1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Arguments: 65 . comm - communicator on which the star forest will operate 66 67 Output Arguments: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 105 *sf = b; 106 PetscFunctionReturn(0); 107 } 108 109 /*@ 110 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 111 112 Collective 113 114 Input Arguments: 115 . sf - star forest 116 117 Level: advanced 118 119 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 120 @*/ 121 PetscErrorCode PetscSFReset(PetscSF sf) 122 { 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 127 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 128 sf->nroots = -1; 129 sf->nleaves = -1; 130 sf->minleaf = PETSC_MAX_INT; 131 sf->maxleaf = PETSC_MIN_INT; 132 sf->mine = NULL; 133 sf->remote = NULL; 134 sf->graphset = PETSC_FALSE; 135 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 136 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 137 sf->nranks = -1; 138 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 139 #if defined(PETSC_HAVE_DEVICE) 140 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 141 #endif 142 sf->degreeknown = PETSC_FALSE; 143 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 144 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 145 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 146 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 147 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 148 sf->setupcalled = PETSC_FALSE; 149 PetscFunctionReturn(0); 150 } 151 152 /*@C 153 PetscSFSetType - Set the PetscSF communication implementation 154 155 Collective on PetscSF 156 157 Input Parameters: 158 + sf - the PetscSF context 159 - type - a known method 160 161 Options Database Key: 162 . -sf_type <type> - Sets the method; use -help for a list 163 of available methods (for instance, window, basic, neighbor) 164 165 Notes: 166 See "include/petscsf.h" for available methods (for instance) 167 + PETSCSFWINDOW - MPI-2/3 one-sided 168 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 169 170 Level: intermediate 171 172 .seealso: PetscSFType, PetscSFCreate() 173 @*/ 174 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 175 { 176 PetscErrorCode ierr,(*r)(PetscSF); 177 PetscBool match; 178 179 PetscFunctionBegin; 180 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 181 PetscValidCharPointer(type,2); 182 183 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 184 if (match) PetscFunctionReturn(0); 185 186 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 187 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 188 /* Destroy the previous PetscSF implementation context */ 189 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 190 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 191 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 192 ierr = (*r)(sf);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 /*@C 197 PetscSFGetType - Get the PetscSF communication implementation 198 199 Not Collective 200 201 Input Parameter: 202 . sf - the PetscSF context 203 204 Output Parameter: 205 . type - the PetscSF type name 206 207 Level: intermediate 208 209 .seealso: PetscSFSetType(), PetscSFCreate() 210 @*/ 211 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 212 { 213 PetscFunctionBegin; 214 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 215 PetscValidPointer(type,2); 216 *type = ((PetscObject)sf)->type_name; 217 PetscFunctionReturn(0); 218 } 219 220 /*@ 221 PetscSFDestroy - destroy star forest 222 223 Collective 224 225 Input Arguments: 226 . sf - address of star forest 227 228 Level: intermediate 229 230 .seealso: PetscSFCreate(), PetscSFReset() 231 @*/ 232 PetscErrorCode PetscSFDestroy(PetscSF *sf) 233 { 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 if (!*sf) PetscFunctionReturn(0); 238 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 239 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 240 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 241 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 242 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 247 { 248 PetscInt i, nleaves; 249 PetscMPIInt size; 250 const PetscInt *ilocal; 251 const PetscSFNode *iremote; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 256 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 257 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 258 for (i = 0; i < nleaves; i++) { 259 const PetscInt rank = iremote[i].rank; 260 const PetscInt remote = iremote[i].index; 261 const PetscInt leaf = ilocal ? ilocal[i] : i; 262 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 263 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 264 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 265 } 266 PetscFunctionReturn(0); 267 } 268 269 /*@ 270 PetscSFSetUp - set up communication structures 271 272 Collective 273 274 Input Arguments: 275 . sf - star forest communication object 276 277 Level: beginner 278 279 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 280 @*/ 281 PetscErrorCode PetscSFSetUp(PetscSF sf) 282 { 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 287 PetscSFCheckGraphSet(sf,1); 288 if (sf->setupcalled) PetscFunctionReturn(0); 289 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 290 sf->use_gpu_aware_mpi = use_gpu_aware_mpi; 291 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 292 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 293 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 294 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 295 sf->setupcalled = PETSC_TRUE; 296 PetscFunctionReturn(0); 297 } 298 299 /*@ 300 PetscSFSetFromOptions - set PetscSF options using the options database 301 302 Logically Collective 303 304 Input Arguments: 305 . sf - star forest 306 307 Options Database Keys: 308 + -sf_type - implementation type, see PetscSFSetType() 309 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 310 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 311 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 312 If true, this option only works with -use_cuda_aware_mpi 1. 313 - -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 314 If true, this option only works with -use_cuda_aware_mpi 1. 315 316 Level: intermediate 317 @*/ 318 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 319 { 320 PetscSFType deft; 321 char type[256]; 322 PetscErrorCode ierr; 323 PetscBool flg; 324 325 PetscFunctionBegin; 326 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 327 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 328 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 329 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 330 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 331 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 332 333 #if defined(PETSC_HAVE_DEVICE) 334 sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 335 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 336 sf->use_stream_aware_mpi = PETSC_FALSE; 337 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 338 #endif 339 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 340 ierr = PetscOptionsEnd();CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 /*@ 345 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 346 347 Logically Collective 348 349 Input Arguments: 350 + sf - star forest 351 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 352 353 Level: advanced 354 355 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 356 @*/ 357 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 358 { 359 PetscFunctionBegin; 360 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 361 PetscValidLogicalCollectiveBool(sf,flg,2); 362 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 363 sf->rankorder = flg; 364 PetscFunctionReturn(0); 365 } 366 367 /*@ 368 PetscSFSetGraph - Set a parallel star forest 369 370 Collective 371 372 Input Arguments: 373 + sf - star forest 374 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 375 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 376 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 377 during setup in debug mode) 378 . localmode - copy mode for ilocal 379 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 380 during setup in debug mode) 381 - remotemode - copy mode for iremote 382 383 Level: intermediate 384 385 Notes: 386 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 387 388 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 389 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 390 needed 391 392 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 393 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 394 395 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 396 @*/ 397 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 398 { 399 PetscErrorCode ierr; 400 401 PetscFunctionBegin; 402 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 403 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 404 if (nleaves > 0) PetscValidPointer(iremote,6); 405 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 406 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 407 408 if (sf->nroots >= 0) { /* Reset only if graph already set */ 409 ierr = PetscSFReset(sf);CHKERRQ(ierr); 410 } 411 412 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 413 414 sf->nroots = nroots; 415 sf->nleaves = nleaves; 416 417 if (nleaves && ilocal) { 418 PetscInt i; 419 PetscInt minleaf = PETSC_MAX_INT; 420 PetscInt maxleaf = PETSC_MIN_INT; 421 int contiguous = 1; 422 for (i=0; i<nleaves; i++) { 423 minleaf = PetscMin(minleaf,ilocal[i]); 424 maxleaf = PetscMax(maxleaf,ilocal[i]); 425 contiguous &= (ilocal[i] == i); 426 } 427 sf->minleaf = minleaf; 428 sf->maxleaf = maxleaf; 429 if (contiguous) { 430 if (localmode == PETSC_OWN_POINTER) { 431 ierr = PetscFree(ilocal);CHKERRQ(ierr); 432 } 433 ilocal = NULL; 434 } 435 } else { 436 sf->minleaf = 0; 437 sf->maxleaf = nleaves - 1; 438 } 439 440 if (ilocal) { 441 switch (localmode) { 442 case PETSC_COPY_VALUES: 443 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 444 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 445 sf->mine = sf->mine_alloc; 446 break; 447 case PETSC_OWN_POINTER: 448 sf->mine_alloc = (PetscInt*)ilocal; 449 sf->mine = sf->mine_alloc; 450 break; 451 case PETSC_USE_POINTER: 452 sf->mine_alloc = NULL; 453 sf->mine = (PetscInt*)ilocal; 454 break; 455 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 456 } 457 } 458 459 switch (remotemode) { 460 case PETSC_COPY_VALUES: 461 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 462 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 463 sf->remote = sf->remote_alloc; 464 break; 465 case PETSC_OWN_POINTER: 466 sf->remote_alloc = (PetscSFNode*)iremote; 467 sf->remote = sf->remote_alloc; 468 break; 469 case PETSC_USE_POINTER: 470 sf->remote_alloc = NULL; 471 sf->remote = (PetscSFNode*)iremote; 472 break; 473 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 474 } 475 476 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 477 sf->graphset = PETSC_TRUE; 478 PetscFunctionReturn(0); 479 } 480 481 /*@ 482 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 483 484 Collective 485 486 Input Parameters: 487 + sf - The PetscSF 488 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 489 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 490 491 Notes: 492 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 493 n and N are local and global sizes of x respectively. 494 495 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 496 sequential vectors y on all ranks. 497 498 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 499 sequential vector y on rank 0. 500 501 In above cases, entries of x are roots and entries of y are leaves. 502 503 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 504 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 505 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 506 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 507 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 508 509 In this case, roots and leaves are symmetric. 510 511 Level: intermediate 512 @*/ 513 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 514 { 515 MPI_Comm comm; 516 PetscInt n,N,res[2]; 517 PetscMPIInt rank,size; 518 PetscSFType type; 519 PetscErrorCode ierr; 520 521 PetscFunctionBegin; 522 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 523 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 524 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 525 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 526 527 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 528 type = PETSCSFALLTOALL; 529 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 530 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 531 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 532 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 533 } else { 534 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 535 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 536 res[0] = n; 537 res[1] = -n; 538 /* Check if n are same over all ranks so that we can optimize it */ 539 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 540 if (res[0] == -res[1]) { /* same n */ 541 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 542 } else { 543 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 544 } 545 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 546 } 547 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 548 549 sf->pattern = pattern; 550 sf->mine = NULL; /* Contiguous */ 551 552 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 553 Also set other easy stuff. 554 */ 555 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 556 sf->nleaves = N; 557 sf->nroots = n; 558 sf->nranks = size; 559 sf->minleaf = 0; 560 sf->maxleaf = N - 1; 561 } else if (pattern == PETSCSF_PATTERN_GATHER) { 562 sf->nleaves = rank ? 0 : N; 563 sf->nroots = n; 564 sf->nranks = rank ? 0 : size; 565 sf->minleaf = 0; 566 sf->maxleaf = rank ? -1 : N - 1; 567 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 568 sf->nleaves = size; 569 sf->nroots = size; 570 sf->nranks = size; 571 sf->minleaf = 0; 572 sf->maxleaf = size - 1; 573 } 574 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 575 sf->graphset = PETSC_TRUE; 576 PetscFunctionReturn(0); 577 } 578 579 /*@ 580 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 581 582 Collective 583 584 Input Arguments: 585 586 . sf - star forest to invert 587 588 Output Arguments: 589 . isf - inverse of sf 590 Level: advanced 591 592 Notes: 593 All roots must have degree 1. 594 595 The local space may be a permutation, but cannot be sparse. 596 597 .seealso: PetscSFSetGraph() 598 @*/ 599 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 600 { 601 PetscErrorCode ierr; 602 PetscMPIInt rank; 603 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 604 const PetscInt *ilocal; 605 PetscSFNode *roots,*leaves; 606 607 PetscFunctionBegin; 608 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 609 PetscSFCheckGraphSet(sf,1); 610 PetscValidPointer(isf,2); 611 612 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 613 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 614 615 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 616 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 617 for (i=0; i<maxlocal; i++) { 618 leaves[i].rank = rank; 619 leaves[i].index = i; 620 } 621 for (i=0; i <nroots; i++) { 622 roots[i].rank = -1; 623 roots[i].index = -1; 624 } 625 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 626 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 627 628 /* Check whether our leaves are sparse */ 629 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 630 if (count == nroots) newilocal = NULL; 631 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 632 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 633 for (i=0,count=0; i<nroots; i++) { 634 if (roots[i].rank >= 0) { 635 newilocal[count] = i; 636 roots[count].rank = roots[i].rank; 637 roots[count].index = roots[i].index; 638 count++; 639 } 640 } 641 } 642 643 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 644 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 645 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 /*@ 650 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 651 652 Collective 653 654 Input Arguments: 655 + sf - communication object to duplicate 656 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 657 658 Output Arguments: 659 . newsf - new communication object 660 661 Level: beginner 662 663 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 664 @*/ 665 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 666 { 667 PetscSFType type; 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 672 PetscValidLogicalCollectiveEnum(sf,opt,2); 673 PetscValidPointer(newsf,3); 674 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 675 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 676 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 677 if (opt == PETSCSF_DUPLICATE_GRAPH) { 678 PetscSFCheckGraphSet(sf,1); 679 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 680 PetscInt nroots,nleaves; 681 const PetscInt *ilocal; 682 const PetscSFNode *iremote; 683 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 684 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 685 } else { 686 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 687 } 688 } 689 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 690 PetscFunctionReturn(0); 691 } 692 693 /*@C 694 PetscSFGetGraph - Get the graph specifying a parallel star forest 695 696 Not Collective 697 698 Input Arguments: 699 . sf - star forest 700 701 Output Arguments: 702 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 703 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 704 . ilocal - locations of leaves in leafdata buffers 705 - iremote - remote locations of root vertices for each leaf on the current process 706 707 Notes: 708 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 709 710 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 711 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 712 713 Level: intermediate 714 715 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 716 @*/ 717 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 718 { 719 PetscErrorCode ierr; 720 721 PetscFunctionBegin; 722 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 723 if (sf->ops->GetGraph) { 724 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 725 } else { 726 if (nroots) *nroots = sf->nroots; 727 if (nleaves) *nleaves = sf->nleaves; 728 if (ilocal) *ilocal = sf->mine; 729 if (iremote) *iremote = sf->remote; 730 } 731 PetscFunctionReturn(0); 732 } 733 734 /*@ 735 PetscSFGetLeafRange - Get the active leaf ranges 736 737 Not Collective 738 739 Input Arguments: 740 . sf - star forest 741 742 Output Arguments: 743 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 744 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 745 746 Level: developer 747 748 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 749 @*/ 750 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 751 { 752 PetscFunctionBegin; 753 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 754 PetscSFCheckGraphSet(sf,1); 755 if (minleaf) *minleaf = sf->minleaf; 756 if (maxleaf) *maxleaf = sf->maxleaf; 757 PetscFunctionReturn(0); 758 } 759 760 /*@C 761 PetscSFViewFromOptions - View from Options 762 763 Collective on PetscSF 764 765 Input Parameters: 766 + A - the star forest 767 . obj - Optional object 768 - name - command line option 769 770 Level: intermediate 771 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 772 @*/ 773 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 774 { 775 PetscErrorCode ierr; 776 777 PetscFunctionBegin; 778 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 779 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 /*@C 784 PetscSFView - view a star forest 785 786 Collective 787 788 Input Arguments: 789 + sf - star forest 790 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 791 792 Level: beginner 793 794 .seealso: PetscSFCreate(), PetscSFSetGraph() 795 @*/ 796 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 797 { 798 PetscErrorCode ierr; 799 PetscBool iascii; 800 PetscViewerFormat format; 801 802 PetscFunctionBegin; 803 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 804 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 805 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 806 PetscCheckSameComm(sf,1,viewer,2); 807 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 808 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 809 if (iascii) { 810 PetscMPIInt rank; 811 PetscInt ii,i,j; 812 813 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 814 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 815 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 816 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 817 if (!sf->graphset) { 818 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 819 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 823 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 825 for (i=0; i<sf->nleaves; i++) { 826 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 827 } 828 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 829 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 830 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 831 PetscMPIInt *tmpranks,*perm; 832 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 833 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 834 for (i=0; i<sf->nranks; i++) perm[i] = i; 835 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 836 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 837 for (ii=0; ii<sf->nranks; ii++) { 838 i = perm[ii]; 839 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 840 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 841 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 842 } 843 } 844 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 845 } 846 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 847 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 848 } 849 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 850 } 851 PetscFunctionReturn(0); 852 } 853 854 /*@C 855 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 856 857 Not Collective 858 859 Input Arguments: 860 . sf - star forest 861 862 Output Arguments: 863 + nranks - number of ranks referenced by local part 864 . ranks - array of ranks 865 . roffset - offset in rmine/rremote for each rank (length nranks+1) 866 . rmine - concatenated array holding local indices referencing each remote rank 867 - rremote - concatenated array holding remote indices referenced for each remote rank 868 869 Level: developer 870 871 .seealso: PetscSFGetLeafRanks() 872 @*/ 873 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 874 { 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 879 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 880 if (sf->ops->GetRootRanks) { 881 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 882 } else { 883 /* The generic implementation */ 884 if (nranks) *nranks = sf->nranks; 885 if (ranks) *ranks = sf->ranks; 886 if (roffset) *roffset = sf->roffset; 887 if (rmine) *rmine = sf->rmine; 888 if (rremote) *rremote = sf->rremote; 889 } 890 PetscFunctionReturn(0); 891 } 892 893 /*@C 894 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 895 896 Not Collective 897 898 Input Arguments: 899 . sf - star forest 900 901 Output Arguments: 902 + niranks - number of leaf ranks referencing roots on this process 903 . iranks - array of ranks 904 . ioffset - offset in irootloc for each rank (length niranks+1) 905 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 906 907 Level: developer 908 909 .seealso: PetscSFGetRootRanks() 910 @*/ 911 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 912 { 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 917 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 918 if (sf->ops->GetLeafRanks) { 919 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 920 } else { 921 PetscSFType type; 922 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 923 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 924 } 925 PetscFunctionReturn(0); 926 } 927 928 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 929 PetscInt i; 930 for (i=0; i<n; i++) { 931 if (needle == list[i]) return PETSC_TRUE; 932 } 933 return PETSC_FALSE; 934 } 935 936 /*@C 937 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 938 939 Collective 940 941 Input Arguments: 942 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 943 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 944 945 Level: developer 946 947 .seealso: PetscSFGetRootRanks() 948 @*/ 949 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 950 { 951 PetscErrorCode ierr; 952 PetscTable table; 953 PetscTablePosition pos; 954 PetscMPIInt size,groupsize,*groupranks; 955 PetscInt *rcount,*ranks; 956 PetscInt i, irank = -1,orank = -1; 957 958 PetscFunctionBegin; 959 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 960 PetscSFCheckGraphSet(sf,1); 961 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 962 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 963 for (i=0; i<sf->nleaves; i++) { 964 /* Log 1-based rank */ 965 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 966 } 967 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 968 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 969 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 970 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 971 for (i=0; i<sf->nranks; i++) { 972 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 973 ranks[i]--; /* Convert back to 0-based */ 974 } 975 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 976 977 /* We expect that dgroup is reliably "small" while nranks could be large */ 978 { 979 MPI_Group group = MPI_GROUP_NULL; 980 PetscMPIInt *dgroupranks; 981 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 982 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 983 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 984 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 985 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 986 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 987 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 988 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 989 } 990 991 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 992 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 993 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 994 if (InList(ranks[i],groupsize,groupranks)) break; 995 } 996 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 997 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 998 } 999 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1000 PetscInt tmprank,tmpcount; 1001 1002 tmprank = ranks[i]; 1003 tmpcount = rcount[i]; 1004 ranks[i] = ranks[sf->ndranks]; 1005 rcount[i] = rcount[sf->ndranks]; 1006 ranks[sf->ndranks] = tmprank; 1007 rcount[sf->ndranks] = tmpcount; 1008 sf->ndranks++; 1009 } 1010 } 1011 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1012 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1013 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1014 sf->roffset[0] = 0; 1015 for (i=0; i<sf->nranks; i++) { 1016 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1017 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1018 rcount[i] = 0; 1019 } 1020 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1021 /* short circuit */ 1022 if (orank != sf->remote[i].rank) { 1023 /* Search for index of iremote[i].rank in sf->ranks */ 1024 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1025 if (irank < 0) { 1026 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1027 if (irank >= 0) irank += sf->ndranks; 1028 } 1029 orank = sf->remote[i].rank; 1030 } 1031 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1032 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1033 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1034 rcount[irank]++; 1035 } 1036 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 /*@C 1041 PetscSFGetGroups - gets incoming and outgoing process groups 1042 1043 Collective 1044 1045 Input Argument: 1046 . sf - star forest 1047 1048 Output Arguments: 1049 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1050 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1051 1052 Level: developer 1053 1054 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1055 @*/ 1056 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1057 { 1058 PetscErrorCode ierr; 1059 MPI_Group group = MPI_GROUP_NULL; 1060 1061 PetscFunctionBegin; 1062 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1063 if (sf->ingroup == MPI_GROUP_NULL) { 1064 PetscInt i; 1065 const PetscInt *indegree; 1066 PetscMPIInt rank,*outranks,*inranks; 1067 PetscSFNode *remote; 1068 PetscSF bgcount; 1069 1070 /* Compute the number of incoming ranks */ 1071 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1072 for (i=0; i<sf->nranks; i++) { 1073 remote[i].rank = sf->ranks[i]; 1074 remote[i].index = 0; 1075 } 1076 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1077 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1078 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1079 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1080 /* Enumerate the incoming ranks */ 1081 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1082 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1083 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1084 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1085 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1086 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1087 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1088 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1089 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1090 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1091 } 1092 *incoming = sf->ingroup; 1093 1094 if (sf->outgroup == MPI_GROUP_NULL) { 1095 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1096 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1097 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1098 } 1099 *outgoing = sf->outgroup; 1100 PetscFunctionReturn(0); 1101 } 1102 1103 /*@ 1104 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1105 1106 Collective 1107 1108 Input Argument: 1109 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1110 1111 Output Arguments: 1112 . multi - star forest with split roots, such that each root has degree exactly 1 1113 1114 Level: developer 1115 1116 Notes: 1117 1118 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1119 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1120 edge, it is a candidate for future optimization that might involve its removal. 1121 1122 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1123 @*/ 1124 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1125 { 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1130 PetscValidPointer(multi,2); 1131 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1132 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1133 *multi = sf->multi; 1134 PetscFunctionReturn(0); 1135 } 1136 if (!sf->multi) { 1137 const PetscInt *indegree; 1138 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1139 PetscSFNode *remote; 1140 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1141 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1142 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1143 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1144 inoffset[0] = 0; 1145 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1146 for (i=0; i<maxlocal; i++) outones[i] = 1; 1147 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1148 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1149 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1150 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1151 for (i=0; i<sf->nroots; i++) { 1152 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1153 } 1154 } 1155 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1156 for (i=0; i<sf->nleaves; i++) { 1157 remote[i].rank = sf->remote[i].rank; 1158 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1159 } 1160 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1161 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1162 if (sf->rankorder) { /* Sort the ranks */ 1163 PetscMPIInt rank; 1164 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1165 PetscSFNode *newremote; 1166 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1167 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1168 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1169 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1170 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1171 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1172 /* Sort the incoming ranks at each vertex, build the inverse map */ 1173 for (i=0; i<sf->nroots; i++) { 1174 PetscInt j; 1175 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1176 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1177 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1178 } 1179 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1180 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1181 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1182 for (i=0; i<sf->nleaves; i++) { 1183 newremote[i].rank = sf->remote[i].rank; 1184 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1185 } 1186 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1187 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1188 } 1189 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1190 } 1191 *multi = sf->multi; 1192 PetscFunctionReturn(0); 1193 } 1194 1195 /*@C 1196 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1197 1198 Collective 1199 1200 Input Arguments: 1201 + sf - original star forest 1202 . nselected - number of selected roots on this process 1203 - selected - indices of the selected roots on this process 1204 1205 Output Arguments: 1206 . esf - new star forest 1207 1208 Level: advanced 1209 1210 Note: 1211 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1212 be done by calling PetscSFGetGraph(). 1213 1214 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1215 @*/ 1216 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1217 { 1218 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1219 const PetscInt *ilocal; 1220 signed char *rootdata,*leafdata,*leafmem; 1221 const PetscSFNode *iremote; 1222 PetscSFNode *new_iremote; 1223 MPI_Comm comm; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1228 PetscSFCheckGraphSet(sf,1); 1229 if (nselected) PetscValidPointer(selected,3); 1230 PetscValidPointer(esf,4); 1231 1232 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1233 1234 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1235 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1236 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1237 1238 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1239 1240 PetscBool dups; 1241 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1242 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1243 for (i=0; i<nselected; i++) 1244 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1245 } 1246 1247 if (sf->ops->CreateEmbeddedSF) { 1248 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1249 } else { 1250 /* A generic version of creating embedded sf */ 1251 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1252 maxlocal = maxleaf - minleaf + 1; 1253 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1254 leafdata = leafmem - minleaf; 1255 /* Tag selected roots and bcast to leaves */ 1256 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1257 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1258 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1259 1260 /* Build esf with leaves that are still connected */ 1261 esf_nleaves = 0; 1262 for (i=0; i<nleaves; i++) { 1263 j = ilocal ? ilocal[i] : i; 1264 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1265 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1266 */ 1267 esf_nleaves += (leafdata[j] ? 1 : 0); 1268 } 1269 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1270 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1271 for (i=n=0; i<nleaves; i++) { 1272 j = ilocal ? ilocal[i] : i; 1273 if (leafdata[j]) { 1274 new_ilocal[n] = j; 1275 new_iremote[n].rank = iremote[i].rank; 1276 new_iremote[n].index = iremote[i].index; 1277 ++n; 1278 } 1279 } 1280 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1281 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1282 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1283 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1284 } 1285 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /*@C 1290 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1291 1292 Collective 1293 1294 Input Arguments: 1295 + sf - original star forest 1296 . nselected - number of selected leaves on this process 1297 - selected - indices of the selected leaves on this process 1298 1299 Output Arguments: 1300 . newsf - new star forest 1301 1302 Level: advanced 1303 1304 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1305 @*/ 1306 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1307 { 1308 const PetscSFNode *iremote; 1309 PetscSFNode *new_iremote; 1310 const PetscInt *ilocal; 1311 PetscInt i,nroots,*leaves,*new_ilocal; 1312 MPI_Comm comm; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1317 PetscSFCheckGraphSet(sf,1); 1318 if (nselected) PetscValidPointer(selected,3); 1319 PetscValidPointer(newsf,4); 1320 1321 /* Uniq selected[] and put results in leaves[] */ 1322 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1323 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1324 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1325 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1326 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1327 1328 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1329 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1330 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1331 } else { 1332 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1333 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1334 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1335 for (i=0; i<nselected; ++i) { 1336 const PetscInt l = leaves[i]; 1337 new_ilocal[i] = ilocal ? ilocal[l] : l; 1338 new_iremote[i].rank = iremote[l].rank; 1339 new_iremote[i].index = iremote[l].index; 1340 } 1341 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1342 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1343 } 1344 ierr = PetscFree(leaves);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 /*@C 1349 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1350 1351 Collective on PetscSF 1352 1353 Input Arguments: 1354 + sf - star forest on which to communicate 1355 . unit - data type associated with each node 1356 . rootdata - buffer to broadcast 1357 - op - operation to use for reduction 1358 1359 Output Arguments: 1360 . leafdata - buffer to be reduced with values from each leaf's respective root 1361 1362 Level: intermediate 1363 1364 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1365 @*/ 1366 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1367 { 1368 PetscErrorCode ierr; 1369 PetscMemType rootmtype,leafmtype; 1370 1371 PetscFunctionBegin; 1372 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1373 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1374 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1375 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1376 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1377 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1378 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 /*@C 1383 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1384 1385 Collective 1386 1387 Input Arguments: 1388 + sf - star forest 1389 . unit - data type 1390 . rootdata - buffer to broadcast 1391 - op - operation to use for reduction 1392 1393 Output Arguments: 1394 . leafdata - buffer to be reduced with values from each leaf's respective root 1395 1396 Level: intermediate 1397 1398 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1399 @*/ 1400 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1401 { 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1406 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1407 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1408 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1409 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 /*@C 1414 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1415 1416 Collective 1417 1418 Input Arguments: 1419 + sf - star forest 1420 . unit - data type 1421 . leafdata - values to reduce 1422 - op - reduction operation 1423 1424 Output Arguments: 1425 . rootdata - result of reduction of values from all leaves of each root 1426 1427 Level: intermediate 1428 1429 .seealso: PetscSFBcastBegin() 1430 @*/ 1431 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1432 { 1433 PetscErrorCode ierr; 1434 PetscMemType rootmtype,leafmtype; 1435 1436 PetscFunctionBegin; 1437 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1438 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1439 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1440 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1441 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1442 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1443 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /*@C 1448 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1449 1450 Collective 1451 1452 Input Arguments: 1453 + sf - star forest 1454 . unit - data type 1455 . leafdata - values to reduce 1456 - op - reduction operation 1457 1458 Output Arguments: 1459 . rootdata - result of reduction of values from all leaves of each root 1460 1461 Level: intermediate 1462 1463 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1464 @*/ 1465 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1466 { 1467 PetscErrorCode ierr; 1468 1469 PetscFunctionBegin; 1470 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1471 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1472 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1473 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1474 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 /*@C 1479 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1480 1481 Collective 1482 1483 Input Arguments: 1484 + sf - star forest 1485 . unit - data type 1486 . leafdata - leaf values to use in reduction 1487 - op - operation to use for reduction 1488 1489 Output Arguments: 1490 + rootdata - root values to be updated, input state is seen by first process to perform an update 1491 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1492 1493 Level: advanced 1494 1495 Note: 1496 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1497 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1498 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1499 integers. 1500 1501 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1502 @*/ 1503 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1504 { 1505 PetscErrorCode ierr; 1506 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1507 1508 PetscFunctionBegin; 1509 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1510 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1511 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1512 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1513 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1514 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1515 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1516 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1517 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 /*@C 1522 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1523 1524 Collective 1525 1526 Input Arguments: 1527 + sf - star forest 1528 . unit - data type 1529 . leafdata - leaf values to use in reduction 1530 - op - operation to use for reduction 1531 1532 Output Arguments: 1533 + rootdata - root values to be updated, input state is seen by first process to perform an update 1534 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1535 1536 Level: advanced 1537 1538 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1539 @*/ 1540 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1541 { 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1546 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1547 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1548 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1549 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 /*@C 1554 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1555 1556 Collective 1557 1558 Input Arguments: 1559 . sf - star forest 1560 1561 Output Arguments: 1562 . degree - degree of each root vertex 1563 1564 Level: advanced 1565 1566 Notes: 1567 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1568 1569 .seealso: PetscSFGatherBegin() 1570 @*/ 1571 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1572 { 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1577 PetscSFCheckGraphSet(sf,1); 1578 PetscValidPointer(degree,2); 1579 if (!sf->degreeknown) { 1580 PetscInt i, nroots = sf->nroots, maxlocal; 1581 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1582 maxlocal = sf->maxleaf-sf->minleaf+1; 1583 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1584 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1585 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1586 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1587 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1588 } 1589 *degree = NULL; 1590 PetscFunctionReturn(0); 1591 } 1592 1593 /*@C 1594 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1595 1596 Collective 1597 1598 Input Arguments: 1599 . sf - star forest 1600 1601 Output Arguments: 1602 . degree - degree of each root vertex 1603 1604 Level: developer 1605 1606 Notes: 1607 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1608 1609 .seealso: 1610 @*/ 1611 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1612 { 1613 PetscErrorCode ierr; 1614 1615 PetscFunctionBegin; 1616 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1617 PetscSFCheckGraphSet(sf,1); 1618 PetscValidPointer(degree,2); 1619 if (!sf->degreeknown) { 1620 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1621 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1622 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1623 sf->degreeknown = PETSC_TRUE; 1624 } 1625 *degree = sf->degree; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 1630 /*@C 1631 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1632 Each multi-root is assigned index of the corresponding original root. 1633 1634 Collective 1635 1636 Input Arguments: 1637 + sf - star forest 1638 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1639 1640 Output Arguments: 1641 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1642 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1643 1644 Level: developer 1645 1646 Notes: 1647 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1648 1649 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1650 @*/ 1651 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1652 { 1653 PetscSF msf; 1654 PetscInt i, j, k, nroots, nmroots; 1655 PetscErrorCode ierr; 1656 1657 PetscFunctionBegin; 1658 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1659 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1660 if (nroots) PetscValidIntPointer(degree,2); 1661 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1662 PetscValidPointer(multiRootsOrigNumbering,4); 1663 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1664 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1665 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1666 for (i=0,j=0,k=0; i<nroots; i++) { 1667 if (!degree[i]) continue; 1668 for (j=0; j<degree[i]; j++,k++) { 1669 (*multiRootsOrigNumbering)[k] = i; 1670 } 1671 } 1672 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1673 if (nMultiRoots) *nMultiRoots = nmroots; 1674 PetscFunctionReturn(0); 1675 } 1676 1677 /*@C 1678 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1679 1680 Collective 1681 1682 Input Arguments: 1683 + sf - star forest 1684 . unit - data type 1685 - leafdata - leaf data to gather to roots 1686 1687 Output Argument: 1688 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1689 1690 Level: intermediate 1691 1692 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1693 @*/ 1694 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1695 { 1696 PetscErrorCode ierr; 1697 PetscSF multi = NULL; 1698 1699 PetscFunctionBegin; 1700 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1701 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1702 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1703 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 /*@C 1708 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1709 1710 Collective 1711 1712 Input Arguments: 1713 + sf - star forest 1714 . unit - data type 1715 - leafdata - leaf data to gather to roots 1716 1717 Output Argument: 1718 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1719 1720 Level: intermediate 1721 1722 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1723 @*/ 1724 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1725 { 1726 PetscErrorCode ierr; 1727 PetscSF multi = NULL; 1728 1729 PetscFunctionBegin; 1730 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1731 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1732 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1733 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1734 PetscFunctionReturn(0); 1735 } 1736 1737 /*@C 1738 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1739 1740 Collective 1741 1742 Input Arguments: 1743 + sf - star forest 1744 . unit - data type 1745 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1746 1747 Output Argument: 1748 . leafdata - leaf data to be update with personal data from each respective root 1749 1750 Level: intermediate 1751 1752 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1753 @*/ 1754 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1755 { 1756 PetscErrorCode ierr; 1757 PetscSF multi = NULL; 1758 1759 PetscFunctionBegin; 1760 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1761 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1762 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1763 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 1767 /*@C 1768 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1769 1770 Collective 1771 1772 Input Arguments: 1773 + sf - star forest 1774 . unit - data type 1775 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1776 1777 Output Argument: 1778 . leafdata - leaf data to be update with personal data from each respective root 1779 1780 Level: intermediate 1781 1782 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1783 @*/ 1784 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1785 { 1786 PetscErrorCode ierr; 1787 PetscSF multi = NULL; 1788 1789 PetscFunctionBegin; 1790 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1791 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1792 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1793 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1798 { 1799 PetscInt i, n, nleaves; 1800 const PetscInt *ilocal = NULL; 1801 PetscHSetI seen; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 1806 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1807 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1808 for (i = 0; i < nleaves; i++) { 1809 const PetscInt leaf = ilocal ? ilocal[i] : i; 1810 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1811 } 1812 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1813 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1814 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1815 PetscFunctionReturn(0); 1816 } 1817 1818 /*@ 1819 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1820 1821 Input Parameters: 1822 + sfA - The first PetscSF 1823 - sfB - The second PetscSF 1824 1825 Output Parameters: 1826 . sfBA - The composite SF 1827 1828 Level: developer 1829 1830 Notes: 1831 Currently, the two SFs must be defined on congruent communicators and they must be true star 1832 forests, i.e. the same leaf is not connected with different roots. 1833 1834 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1835 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1836 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1837 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1838 1839 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1840 @*/ 1841 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1842 { 1843 PetscErrorCode ierr; 1844 const PetscSFNode *remotePointsA,*remotePointsB; 1845 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1846 const PetscInt *localPointsA,*localPointsB; 1847 PetscInt *localPointsBA; 1848 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1849 PetscBool denseB; 1850 1851 PetscFunctionBegin; 1852 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1853 PetscSFCheckGraphSet(sfA,1); 1854 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1855 PetscSFCheckGraphSet(sfB,2); 1856 PetscCheckSameComm(sfA,1,sfB,2); 1857 PetscValidPointer(sfBA,3); 1858 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1859 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1860 1861 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1862 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1863 if (localPointsA) { 1864 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1865 for (i=0; i<numRootsB; i++) { 1866 reorderedRemotePointsA[i].rank = -1; 1867 reorderedRemotePointsA[i].index = -1; 1868 } 1869 for (i=0; i<numLeavesA; i++) { 1870 if (localPointsA[i] >= numRootsB) continue; 1871 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1872 } 1873 remotePointsA = reorderedRemotePointsA; 1874 } 1875 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1876 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1877 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1878 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1879 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1880 1881 denseB = (PetscBool)!localPointsB; 1882 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1883 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1884 else numLeavesBA++; 1885 } 1886 if (denseB) { 1887 localPointsBA = NULL; 1888 remotePointsBA = leafdataB; 1889 } else { 1890 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1891 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1892 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1893 const PetscInt l = localPointsB ? localPointsB[i] : i; 1894 1895 if (leafdataB[l-minleaf].rank == -1) continue; 1896 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1897 localPointsBA[numLeavesBA] = l; 1898 numLeavesBA++; 1899 } 1900 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1901 } 1902 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1903 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1904 PetscFunctionReturn(0); 1905 } 1906 1907 /*@ 1908 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1909 1910 Input Parameters: 1911 + sfA - The first PetscSF 1912 - sfB - The second PetscSF 1913 1914 Output Parameters: 1915 . sfBA - The composite SF. 1916 1917 Level: developer 1918 1919 Notes: 1920 Currently, the two SFs must be defined on congruent communicators and they must be true star 1921 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1922 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1923 1924 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1925 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1926 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1927 a Reduce on sfB, on connected roots. 1928 1929 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1930 @*/ 1931 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1932 { 1933 PetscErrorCode ierr; 1934 const PetscSFNode *remotePointsA,*remotePointsB; 1935 PetscSFNode *remotePointsBA; 1936 const PetscInt *localPointsA,*localPointsB; 1937 PetscSFNode *reorderedRemotePointsA = NULL; 1938 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1939 MPI_Op op; 1940 #if defined(PETSC_USE_64BIT_INDICES) 1941 PetscBool iswin; 1942 #endif 1943 1944 PetscFunctionBegin; 1945 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1946 PetscSFCheckGraphSet(sfA,1); 1947 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1948 PetscSFCheckGraphSet(sfB,2); 1949 PetscCheckSameComm(sfA,1,sfB,2); 1950 PetscValidPointer(sfBA,3); 1951 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1952 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1953 1954 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1955 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1956 1957 /* TODO: Check roots of sfB have degree of 1 */ 1958 /* Once we implement it, we can replace the MPI_MAXLOC 1959 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1960 We use MPI_MAXLOC only to have a deterministic output from this routine if 1961 the root condition is not meet. 1962 */ 1963 op = MPI_MAXLOC; 1964 #if defined(PETSC_USE_64BIT_INDICES) 1965 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 1966 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 1967 if (iswin) op = MPIU_REPLACE; 1968 #endif 1969 1970 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1971 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1972 for (i=0; i<maxleaf - minleaf + 1; i++) { 1973 reorderedRemotePointsA[i].rank = -1; 1974 reorderedRemotePointsA[i].index = -1; 1975 } 1976 if (localPointsA) { 1977 for (i=0; i<numLeavesA; i++) { 1978 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1979 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1980 } 1981 } else { 1982 for (i=0; i<numLeavesA; i++) { 1983 if (i > maxleaf || i < minleaf) continue; 1984 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1985 } 1986 } 1987 1988 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1989 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1990 for (i=0; i<numRootsB; i++) { 1991 remotePointsBA[i].rank = -1; 1992 remotePointsBA[i].index = -1; 1993 } 1994 1995 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1996 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1997 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1998 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1999 if (remotePointsBA[i].rank == -1) continue; 2000 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2001 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2002 localPointsBA[numLeavesBA] = i; 2003 numLeavesBA++; 2004 } 2005 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2006 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 /* 2011 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2012 2013 Input Parameters: 2014 . sf - The global PetscSF 2015 2016 Output Parameters: 2017 . out - The local PetscSF 2018 */ 2019 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2020 { 2021 MPI_Comm comm; 2022 PetscMPIInt myrank; 2023 const PetscInt *ilocal; 2024 const PetscSFNode *iremote; 2025 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2026 PetscSFNode *liremote; 2027 PetscSF lsf; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2032 if (sf->ops->CreateLocalSF) { 2033 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2034 } else { 2035 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2036 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2037 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2038 2039 /* Find out local edges and build a local SF */ 2040 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2041 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2042 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2043 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2044 2045 for (i=j=0; i<nleaves; i++) { 2046 if (iremote[i].rank == (PetscInt)myrank) { 2047 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2048 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2049 liremote[j].index = iremote[i].index; 2050 j++; 2051 } 2052 } 2053 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2054 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2055 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2056 *out = lsf; 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2062 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2063 { 2064 PetscErrorCode ierr; 2065 PetscMemType rootmtype,leafmtype; 2066 2067 PetscFunctionBegin; 2068 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2069 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2070 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2071 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2072 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2073 if (sf->ops->BcastToZero) { 2074 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2075 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2076 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080