1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscErrorCode ierr; 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscValidPointer(sf,2); 61 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 62 63 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 64 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_MAX_INT; 68 b->maxleaf = PETSC_MIN_INT; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream= PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 90 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(0); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 118 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 127 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 128 sf->nranks = -1; 129 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 130 sf->degreeknown = PETSC_FALSE; 131 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 132 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 133 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 134 if (sf->multi) sf->multi->multi = NULL; 135 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 136 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 137 138 #if defined(PETSC_HAVE_DEVICE) 139 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 140 #endif 141 142 sf->setupcalled = PETSC_FALSE; 143 PetscFunctionReturn(0); 144 } 145 146 /*@C 147 PetscSFSetType - Set the PetscSF communication implementation 148 149 Collective on PetscSF 150 151 Input Parameters: 152 + sf - the PetscSF context 153 - type - a known method 154 155 Options Database Key: 156 . -sf_type <type> - Sets the method; use -help for a list 157 of available methods (for instance, window, basic, neighbor) 158 159 Notes: 160 See "include/petscsf.h" for available methods (for instance) 161 + PETSCSFWINDOW - MPI-2/3 one-sided 162 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 163 164 Level: intermediate 165 166 .seealso: PetscSFType, PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 169 { 170 PetscErrorCode ierr,(*r)(PetscSF); 171 PetscBool match; 172 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 175 PetscValidCharPointer(type,2); 176 177 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 178 if (match) PetscFunctionReturn(0); 179 180 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 181 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 182 /* Destroy the previous PetscSF implementation context */ 183 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 184 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 185 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 186 ierr = (*r)(sf);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 /*@C 191 PetscSFGetType - Get the PetscSF communication implementation 192 193 Not Collective 194 195 Input Parameter: 196 . sf - the PetscSF context 197 198 Output Parameter: 199 . type - the PetscSF type name 200 201 Level: intermediate 202 203 .seealso: PetscSFSetType(), PetscSFCreate() 204 @*/ 205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 206 { 207 PetscFunctionBegin; 208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 209 PetscValidPointer(type,2); 210 *type = ((PetscObject)sf)->type_name; 211 PetscFunctionReturn(0); 212 } 213 214 /*@C 215 PetscSFDestroy - destroy star forest 216 217 Collective 218 219 Input Parameter: 220 . sf - address of star forest 221 222 Level: intermediate 223 224 .seealso: PetscSFCreate(), PetscSFReset() 225 @*/ 226 PetscErrorCode PetscSFDestroy(PetscSF *sf) 227 { 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(0); 232 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 233 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 234 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 235 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 236 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 237 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 238 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 243 { 244 PetscInt i, nleaves; 245 PetscMPIInt size; 246 const PetscInt *ilocal; 247 const PetscSFNode *iremote; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 252 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 254 for (i = 0; i < nleaves; i++) { 255 const PetscInt rank = iremote[i].rank; 256 const PetscInt remote = iremote[i].index; 257 const PetscInt leaf = ilocal ? ilocal[i] : i; 258 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 259 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 260 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 261 } 262 PetscFunctionReturn(0); 263 } 264 265 /*@ 266 PetscSFSetUp - set up communication structures 267 268 Collective 269 270 Input Parameter: 271 . sf - star forest communication object 272 273 Level: beginner 274 275 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 276 @*/ 277 PetscErrorCode PetscSFSetUp(PetscSF sf) 278 { 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 283 PetscSFCheckGraphSet(sf,1); 284 if (sf->setupcalled) PetscFunctionReturn(0); 285 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 286 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 287 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 288 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 289 #if defined(PETSC_HAVE_CUDA) 290 if (sf->backend == PETSCSF_BACKEND_CUDA) { 291 sf->ops->Malloc = PetscSFMalloc_CUDA; 292 sf->ops->Free = PetscSFFree_CUDA; 293 } 294 #endif 295 #if defined(PETSC_HAVE_HIP) 296 if (sf->backend == PETSCSF_BACKEND_HIP) { 297 sf->ops->Malloc = PetscSFMalloc_HIP; 298 sf->ops->Free = PetscSFFree_HIP; 299 } 300 #endif 301 302 # 303 #if defined(PETSC_HAVE_KOKKOS) 304 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 305 sf->ops->Malloc = PetscSFMalloc_Kokkos; 306 sf->ops->Free = PetscSFFree_Kokkos; 307 } 308 #endif 309 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 310 sf->setupcalled = PETSC_TRUE; 311 PetscFunctionReturn(0); 312 } 313 314 /*@ 315 PetscSFSetFromOptions - set PetscSF options using the options database 316 317 Logically Collective 318 319 Input Parameter: 320 . sf - star forest 321 322 Options Database Keys: 323 + -sf_type - implementation type, see PetscSFSetType() 324 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 325 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 326 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 327 If true, this option only works with -use_gpu_aware_mpi 1. 328 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 329 If true, this option only works with -use_gpu_aware_mpi 1. 330 331 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 332 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 333 the only available is kokkos. 334 335 Level: intermediate 336 @*/ 337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 338 { 339 PetscSFType deft; 340 char type[256]; 341 PetscErrorCode ierr; 342 PetscBool flg; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 346 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 347 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 348 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 349 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 350 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 351 #if defined(PETSC_HAVE_DEVICE) 352 { 353 char backendstr[32] = {0}; 354 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 355 /* Change the defaults set in PetscSFCreate() with command line options */ 356 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 357 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 358 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 359 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 360 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 361 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 362 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 363 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 364 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 365 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 366 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 367 #elif defined(PETSC_HAVE_KOKKOS) 368 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 369 #endif 370 } 371 #endif 372 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 373 ierr = PetscOptionsEnd();CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 /*@ 378 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 379 380 Logically Collective 381 382 Input Parameters: 383 + sf - star forest 384 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 385 386 Level: advanced 387 388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 389 @*/ 390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 391 { 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 394 PetscValidLogicalCollectiveBool(sf,flg,2); 395 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 396 sf->rankorder = flg; 397 PetscFunctionReturn(0); 398 } 399 400 /*@C 401 PetscSFSetGraph - Set a parallel star forest 402 403 Collective 404 405 Input Parameters: 406 + sf - star forest 407 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 408 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 409 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 410 during setup in debug mode) 411 . localmode - copy mode for ilocal 412 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 413 during setup in debug mode) 414 - remotemode - copy mode for iremote 415 416 Level: intermediate 417 418 Notes: 419 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 420 421 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 422 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 423 needed 424 425 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 426 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 427 428 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 429 @*/ 430 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 431 { 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 436 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 437 if (nleaves > 0) PetscValidPointer(iremote,6); 438 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 439 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 440 441 if (sf->nroots >= 0) { /* Reset only if graph already set */ 442 ierr = PetscSFReset(sf);CHKERRQ(ierr); 443 } 444 445 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 446 447 sf->nroots = nroots; 448 sf->nleaves = nleaves; 449 450 if (nleaves && ilocal) { 451 PetscInt i; 452 PetscInt minleaf = PETSC_MAX_INT; 453 PetscInt maxleaf = PETSC_MIN_INT; 454 int contiguous = 1; 455 for (i=0; i<nleaves; i++) { 456 minleaf = PetscMin(minleaf,ilocal[i]); 457 maxleaf = PetscMax(maxleaf,ilocal[i]); 458 contiguous &= (ilocal[i] == i); 459 } 460 sf->minleaf = minleaf; 461 sf->maxleaf = maxleaf; 462 if (contiguous) { 463 if (localmode == PETSC_OWN_POINTER) { 464 ierr = PetscFree(ilocal);CHKERRQ(ierr); 465 } 466 ilocal = NULL; 467 } 468 } else { 469 sf->minleaf = 0; 470 sf->maxleaf = nleaves - 1; 471 } 472 473 if (ilocal) { 474 switch (localmode) { 475 case PETSC_COPY_VALUES: 476 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 477 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 478 sf->mine = sf->mine_alloc; 479 break; 480 case PETSC_OWN_POINTER: 481 sf->mine_alloc = (PetscInt*)ilocal; 482 sf->mine = sf->mine_alloc; 483 break; 484 case PETSC_USE_POINTER: 485 sf->mine_alloc = NULL; 486 sf->mine = (PetscInt*)ilocal; 487 break; 488 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 489 } 490 } 491 492 switch (remotemode) { 493 case PETSC_COPY_VALUES: 494 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 495 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 496 sf->remote = sf->remote_alloc; 497 break; 498 case PETSC_OWN_POINTER: 499 sf->remote_alloc = (PetscSFNode*)iremote; 500 sf->remote = sf->remote_alloc; 501 break; 502 case PETSC_USE_POINTER: 503 sf->remote_alloc = NULL; 504 sf->remote = (PetscSFNode*)iremote; 505 break; 506 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 507 } 508 509 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 510 sf->graphset = PETSC_TRUE; 511 PetscFunctionReturn(0); 512 } 513 514 /*@ 515 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 516 517 Collective 518 519 Input Parameters: 520 + sf - The PetscSF 521 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 522 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 523 524 Notes: 525 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 526 n and N are local and global sizes of x respectively. 527 528 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 529 sequential vectors y on all ranks. 530 531 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 532 sequential vector y on rank 0. 533 534 In above cases, entries of x are roots and entries of y are leaves. 535 536 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 537 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 538 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 539 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 540 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 541 542 In this case, roots and leaves are symmetric. 543 544 Level: intermediate 545 @*/ 546 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 547 { 548 MPI_Comm comm; 549 PetscInt n,N,res[2]; 550 PetscMPIInt rank,size; 551 PetscSFType type; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 556 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 557 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 558 if (PetscUnlikely((pattern < PETSCSF_PATTERN_ALLGATHER) || (pattern > PETSCSF_PATTERN_ALLTOALL))) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 559 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 560 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 561 562 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 563 type = PETSCSFALLTOALL; 564 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 565 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 566 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 567 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 568 } else { 569 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 570 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 571 res[0] = n; 572 res[1] = -n; 573 /* Check if n are same over all ranks so that we can optimize it */ 574 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 575 if (res[0] == -res[1]) { /* same n */ 576 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 577 } else { 578 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 579 } 580 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 581 } 582 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 583 584 sf->pattern = pattern; 585 sf->mine = NULL; /* Contiguous */ 586 587 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 588 Also set other easy stuff. 589 */ 590 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 591 sf->nleaves = N; 592 sf->nroots = n; 593 sf->nranks = size; 594 sf->minleaf = 0; 595 sf->maxleaf = N - 1; 596 } else if (pattern == PETSCSF_PATTERN_GATHER) { 597 sf->nleaves = rank ? 0 : N; 598 sf->nroots = n; 599 sf->nranks = rank ? 0 : size; 600 sf->minleaf = 0; 601 sf->maxleaf = rank ? -1 : N - 1; 602 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 603 sf->nleaves = size; 604 sf->nroots = size; 605 sf->nranks = size; 606 sf->minleaf = 0; 607 sf->maxleaf = size - 1; 608 } 609 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 610 sf->graphset = PETSC_TRUE; 611 PetscFunctionReturn(0); 612 } 613 614 /*@ 615 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 616 617 Collective 618 619 Input Parameter: 620 . sf - star forest to invert 621 622 Output Parameter: 623 . isf - inverse of sf 624 625 Level: advanced 626 627 Notes: 628 All roots must have degree 1. 629 630 The local space may be a permutation, but cannot be sparse. 631 632 .seealso: PetscSFSetGraph() 633 @*/ 634 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 635 { 636 PetscErrorCode ierr; 637 PetscMPIInt rank; 638 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 639 const PetscInt *ilocal; 640 PetscSFNode *roots,*leaves; 641 642 PetscFunctionBegin; 643 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 644 PetscSFCheckGraphSet(sf,1); 645 PetscValidPointer(isf,2); 646 647 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 648 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 649 650 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 651 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 652 for (i=0; i<maxlocal; i++) { 653 leaves[i].rank = rank; 654 leaves[i].index = i; 655 } 656 for (i=0; i <nroots; i++) { 657 roots[i].rank = -1; 658 roots[i].index = -1; 659 } 660 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 661 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 662 663 /* Check whether our leaves are sparse */ 664 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 665 if (count == nroots) newilocal = NULL; 666 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 667 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 668 for (i=0,count=0; i<nroots; i++) { 669 if (roots[i].rank >= 0) { 670 newilocal[count] = i; 671 roots[count].rank = roots[i].rank; 672 roots[count].index = roots[i].index; 673 count++; 674 } 675 } 676 } 677 678 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 679 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 680 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 /*@ 685 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 686 687 Collective 688 689 Input Parameters: 690 + sf - communication object to duplicate 691 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 692 693 Output Parameter: 694 . newsf - new communication object 695 696 Level: beginner 697 698 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 699 @*/ 700 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 701 { 702 PetscSFType type; 703 PetscErrorCode ierr; 704 MPI_Datatype dtype=MPIU_SCALAR; 705 706 PetscFunctionBegin; 707 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 708 PetscValidLogicalCollectiveEnum(sf,opt,2); 709 PetscValidPointer(newsf,3); 710 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 711 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 712 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 713 if (opt == PETSCSF_DUPLICATE_GRAPH) { 714 PetscSFCheckGraphSet(sf,1); 715 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 716 PetscInt nroots,nleaves; 717 const PetscInt *ilocal; 718 const PetscSFNode *iremote; 719 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 720 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 721 } else { 722 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 723 } 724 } 725 /* Since oldtype is committed, so is newtype, according to MPI */ 726 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 727 (*newsf)->vscat.bs = sf->vscat.bs; 728 (*newsf)->vscat.unit = dtype; 729 (*newsf)->vscat.to_n = sf->vscat.to_n; 730 (*newsf)->vscat.from_n = sf->vscat.from_n; 731 /* Do not copy lsf. Build it on demand since it is rarely used */ 732 733 #if defined(PETSC_HAVE_DEVICE) 734 (*newsf)->backend = sf->backend; 735 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 736 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 737 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 738 #endif 739 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 740 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 741 PetscFunctionReturn(0); 742 } 743 744 /*@C 745 PetscSFGetGraph - Get the graph specifying a parallel star forest 746 747 Not Collective 748 749 Input Parameter: 750 . sf - star forest 751 752 Output Parameters: 753 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 754 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 755 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 756 - iremote - remote locations of root vertices for each leaf on the current process 757 758 Notes: 759 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 760 761 Fortran Notes: 762 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 763 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 764 765 To check for a NULL ilocal use 766 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 767 768 Level: intermediate 769 770 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 771 @*/ 772 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 773 { 774 PetscErrorCode ierr; 775 776 PetscFunctionBegin; 777 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 778 if (sf->ops->GetGraph) { 779 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 780 } else { 781 if (nroots) *nroots = sf->nroots; 782 if (nleaves) *nleaves = sf->nleaves; 783 if (ilocal) *ilocal = sf->mine; 784 if (iremote) *iremote = sf->remote; 785 } 786 PetscFunctionReturn(0); 787 } 788 789 /*@ 790 PetscSFGetLeafRange - Get the active leaf ranges 791 792 Not Collective 793 794 Input Parameter: 795 . sf - star forest 796 797 Output Parameters: 798 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 799 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 800 801 Level: developer 802 803 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 804 @*/ 805 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 806 { 807 PetscFunctionBegin; 808 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 809 PetscSFCheckGraphSet(sf,1); 810 if (minleaf) *minleaf = sf->minleaf; 811 if (maxleaf) *maxleaf = sf->maxleaf; 812 PetscFunctionReturn(0); 813 } 814 815 /*@C 816 PetscSFViewFromOptions - View from Options 817 818 Collective on PetscSF 819 820 Input Parameters: 821 + A - the star forest 822 . obj - Optional object 823 - name - command line option 824 825 Level: intermediate 826 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 827 @*/ 828 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 829 { 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 834 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 /*@C 839 PetscSFView - view a star forest 840 841 Collective 842 843 Input Parameters: 844 + sf - star forest 845 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 846 847 Level: beginner 848 849 .seealso: PetscSFCreate(), PetscSFSetGraph() 850 @*/ 851 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 852 { 853 PetscErrorCode ierr; 854 PetscBool iascii; 855 PetscViewerFormat format; 856 857 PetscFunctionBegin; 858 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 859 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 860 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 861 PetscCheckSameComm(sf,1,viewer,2); 862 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 863 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 864 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 865 PetscMPIInt rank; 866 PetscInt ii,i,j; 867 868 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 870 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 871 if (!sf->graphset) { 872 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 873 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 877 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 879 for (i=0; i<sf->nleaves; i++) { 880 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 881 } 882 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 PetscMPIInt *tmpranks,*perm; 886 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 887 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 888 for (i=0; i<sf->nranks; i++) perm[i] = i; 889 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 891 for (ii=0; ii<sf->nranks; ii++) { 892 i = perm[ii]; 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 894 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 896 } 897 } 898 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 899 } 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 902 } 903 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 904 } 905 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 906 PetscFunctionReturn(0); 907 } 908 909 /*@C 910 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 911 912 Not Collective 913 914 Input Parameter: 915 . sf - star forest 916 917 Output Parameters: 918 + nranks - number of ranks referenced by local part 919 . ranks - array of ranks 920 . roffset - offset in rmine/rremote for each rank (length nranks+1) 921 . rmine - concatenated array holding local indices referencing each remote rank 922 - rremote - concatenated array holding remote indices referenced for each remote rank 923 924 Level: developer 925 926 .seealso: PetscSFGetLeafRanks() 927 @*/ 928 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 929 { 930 PetscErrorCode ierr; 931 932 PetscFunctionBegin; 933 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 934 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 935 if (sf->ops->GetRootRanks) { 936 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 937 } else { 938 /* The generic implementation */ 939 if (nranks) *nranks = sf->nranks; 940 if (ranks) *ranks = sf->ranks; 941 if (roffset) *roffset = sf->roffset; 942 if (rmine) *rmine = sf->rmine; 943 if (rremote) *rremote = sf->rremote; 944 } 945 PetscFunctionReturn(0); 946 } 947 948 /*@C 949 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 950 951 Not Collective 952 953 Input Parameter: 954 . sf - star forest 955 956 Output Parameters: 957 + niranks - number of leaf ranks referencing roots on this process 958 . iranks - array of ranks 959 . ioffset - offset in irootloc for each rank (length niranks+1) 960 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 961 962 Level: developer 963 964 .seealso: PetscSFGetRootRanks() 965 @*/ 966 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 967 { 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 972 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 973 if (sf->ops->GetLeafRanks) { 974 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 975 } else { 976 PetscSFType type; 977 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 978 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 984 PetscInt i; 985 for (i=0; i<n; i++) { 986 if (needle == list[i]) return PETSC_TRUE; 987 } 988 return PETSC_FALSE; 989 } 990 991 /*@C 992 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 993 994 Collective 995 996 Input Parameters: 997 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 998 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 999 1000 Level: developer 1001 1002 .seealso: PetscSFGetRootRanks() 1003 @*/ 1004 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1005 { 1006 PetscErrorCode ierr; 1007 PetscTable table; 1008 PetscTablePosition pos; 1009 PetscMPIInt size,groupsize,*groupranks; 1010 PetscInt *rcount,*ranks; 1011 PetscInt i, irank = -1,orank = -1; 1012 1013 PetscFunctionBegin; 1014 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1015 PetscSFCheckGraphSet(sf,1); 1016 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1017 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1018 for (i=0; i<sf->nleaves; i++) { 1019 /* Log 1-based rank */ 1020 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1021 } 1022 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1023 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1024 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1025 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1026 for (i=0; i<sf->nranks; i++) { 1027 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1028 ranks[i]--; /* Convert back to 0-based */ 1029 } 1030 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1031 1032 /* We expect that dgroup is reliably "small" while nranks could be large */ 1033 { 1034 MPI_Group group = MPI_GROUP_NULL; 1035 PetscMPIInt *dgroupranks; 1036 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1037 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1038 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1039 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1040 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1041 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1042 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1043 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1044 } 1045 1046 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1047 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1048 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1049 if (InList(ranks[i],groupsize,groupranks)) break; 1050 } 1051 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1052 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1053 } 1054 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1055 PetscInt tmprank,tmpcount; 1056 1057 tmprank = ranks[i]; 1058 tmpcount = rcount[i]; 1059 ranks[i] = ranks[sf->ndranks]; 1060 rcount[i] = rcount[sf->ndranks]; 1061 ranks[sf->ndranks] = tmprank; 1062 rcount[sf->ndranks] = tmpcount; 1063 sf->ndranks++; 1064 } 1065 } 1066 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1067 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1068 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1069 sf->roffset[0] = 0; 1070 for (i=0; i<sf->nranks; i++) { 1071 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1072 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1073 rcount[i] = 0; 1074 } 1075 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1076 /* short circuit */ 1077 if (orank != sf->remote[i].rank) { 1078 /* Search for index of iremote[i].rank in sf->ranks */ 1079 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1080 if (irank < 0) { 1081 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1082 if (irank >= 0) irank += sf->ndranks; 1083 } 1084 orank = sf->remote[i].rank; 1085 } 1086 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1087 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1088 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1089 rcount[irank]++; 1090 } 1091 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 /*@C 1096 PetscSFGetGroups - gets incoming and outgoing process groups 1097 1098 Collective 1099 1100 Input Parameter: 1101 . sf - star forest 1102 1103 Output Parameters: 1104 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1105 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1106 1107 Level: developer 1108 1109 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1110 @*/ 1111 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1112 { 1113 PetscErrorCode ierr; 1114 MPI_Group group = MPI_GROUP_NULL; 1115 1116 PetscFunctionBegin; 1117 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1118 if (sf->ingroup == MPI_GROUP_NULL) { 1119 PetscInt i; 1120 const PetscInt *indegree; 1121 PetscMPIInt rank,*outranks,*inranks; 1122 PetscSFNode *remote; 1123 PetscSF bgcount; 1124 1125 /* Compute the number of incoming ranks */ 1126 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1127 for (i=0; i<sf->nranks; i++) { 1128 remote[i].rank = sf->ranks[i]; 1129 remote[i].index = 0; 1130 } 1131 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1132 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1133 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1134 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1135 /* Enumerate the incoming ranks */ 1136 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1137 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1138 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1139 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1140 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1141 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1142 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1143 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1144 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1145 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1146 } 1147 *incoming = sf->ingroup; 1148 1149 if (sf->outgroup == MPI_GROUP_NULL) { 1150 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1151 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1152 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1153 } 1154 *outgoing = sf->outgroup; 1155 PetscFunctionReturn(0); 1156 } 1157 1158 /*@ 1159 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1160 1161 Collective 1162 1163 Input Parameter: 1164 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1165 1166 Output Parameter: 1167 . multi - star forest with split roots, such that each root has degree exactly 1 1168 1169 Level: developer 1170 1171 Notes: 1172 1173 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1174 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1175 edge, it is a candidate for future optimization that might involve its removal. 1176 1177 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1178 @*/ 1179 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1180 { 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1185 PetscValidPointer(multi,2); 1186 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1187 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1188 *multi = sf->multi; 1189 sf->multi->multi = sf->multi; 1190 PetscFunctionReturn(0); 1191 } 1192 if (!sf->multi) { 1193 const PetscInt *indegree; 1194 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1195 PetscSFNode *remote; 1196 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1197 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1198 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1199 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1200 inoffset[0] = 0; 1201 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1202 for (i=0; i<maxlocal; i++) outones[i] = 1; 1203 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1204 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1205 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1206 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1207 for (i=0; i<sf->nroots; i++) { 1208 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1209 } 1210 } 1211 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1212 for (i=0; i<sf->nleaves; i++) { 1213 remote[i].rank = sf->remote[i].rank; 1214 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1215 } 1216 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1217 sf->multi->multi = sf->multi; 1218 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1219 if (sf->rankorder) { /* Sort the ranks */ 1220 PetscMPIInt rank; 1221 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1222 PetscSFNode *newremote; 1223 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1224 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1225 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1226 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1227 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1228 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1229 /* Sort the incoming ranks at each vertex, build the inverse map */ 1230 for (i=0; i<sf->nroots; i++) { 1231 PetscInt j; 1232 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1233 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1234 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1235 } 1236 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1237 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1238 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1239 for (i=0; i<sf->nleaves; i++) { 1240 newremote[i].rank = sf->remote[i].rank; 1241 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1242 } 1243 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1244 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1245 } 1246 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1247 } 1248 *multi = sf->multi; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /*@C 1253 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1254 1255 Collective 1256 1257 Input Parameters: 1258 + sf - original star forest 1259 . nselected - number of selected roots on this process 1260 - selected - indices of the selected roots on this process 1261 1262 Output Parameter: 1263 . esf - new star forest 1264 1265 Level: advanced 1266 1267 Note: 1268 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1269 be done by calling PetscSFGetGraph(). 1270 1271 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1272 @*/ 1273 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1274 { 1275 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1276 const PetscInt *ilocal; 1277 signed char *rootdata,*leafdata,*leafmem; 1278 const PetscSFNode *iremote; 1279 PetscSFNode *new_iremote; 1280 MPI_Comm comm; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1285 PetscSFCheckGraphSet(sf,1); 1286 if (nselected) PetscValidPointer(selected,3); 1287 PetscValidPointer(esf,4); 1288 1289 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1290 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1291 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1292 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1293 1294 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1295 PetscBool dups; 1296 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1297 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1298 for (i=0; i<nselected; i++) 1299 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1300 } 1301 1302 if (sf->ops->CreateEmbeddedRootSF) { 1303 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1304 } else { 1305 /* A generic version of creating embedded sf */ 1306 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1307 maxlocal = maxleaf - minleaf + 1; 1308 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1309 leafdata = leafmem - minleaf; 1310 /* Tag selected roots and bcast to leaves */ 1311 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1312 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1313 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1314 1315 /* Build esf with leaves that are still connected */ 1316 esf_nleaves = 0; 1317 for (i=0; i<nleaves; i++) { 1318 j = ilocal ? ilocal[i] : i; 1319 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1320 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1321 */ 1322 esf_nleaves += (leafdata[j] ? 1 : 0); 1323 } 1324 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1325 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1326 for (i=n=0; i<nleaves; i++) { 1327 j = ilocal ? ilocal[i] : i; 1328 if (leafdata[j]) { 1329 new_ilocal[n] = j; 1330 new_iremote[n].rank = iremote[i].rank; 1331 new_iremote[n].index = iremote[i].index; 1332 ++n; 1333 } 1334 } 1335 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1336 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1337 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1338 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1339 } 1340 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 /*@C 1345 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1346 1347 Collective 1348 1349 Input Parameters: 1350 + sf - original star forest 1351 . nselected - number of selected leaves on this process 1352 - selected - indices of the selected leaves on this process 1353 1354 Output Parameter: 1355 . newsf - new star forest 1356 1357 Level: advanced 1358 1359 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1360 @*/ 1361 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1362 { 1363 const PetscSFNode *iremote; 1364 PetscSFNode *new_iremote; 1365 const PetscInt *ilocal; 1366 PetscInt i,nroots,*leaves,*new_ilocal; 1367 MPI_Comm comm; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1372 PetscSFCheckGraphSet(sf,1); 1373 if (nselected) PetscValidPointer(selected,3); 1374 PetscValidPointer(newsf,4); 1375 1376 /* Uniq selected[] and put results in leaves[] */ 1377 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1378 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1379 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1380 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1381 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1382 1383 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1384 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1385 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1386 } else { 1387 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1388 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1389 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1390 for (i=0; i<nselected; ++i) { 1391 const PetscInt l = leaves[i]; 1392 new_ilocal[i] = ilocal ? ilocal[l] : l; 1393 new_iremote[i].rank = iremote[l].rank; 1394 new_iremote[i].index = iremote[l].index; 1395 } 1396 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1397 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree(leaves);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 /*@C 1404 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1405 1406 Collective on PetscSF 1407 1408 Input Parameters: 1409 + sf - star forest on which to communicate 1410 . unit - data type associated with each node 1411 . rootdata - buffer to broadcast 1412 - op - operation to use for reduction 1413 1414 Output Parameter: 1415 . leafdata - buffer to be reduced with values from each leaf's respective root 1416 1417 Level: intermediate 1418 1419 Notes: 1420 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1421 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1422 use PetscSFBcastWithMemTypeBegin() instead. 1423 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1424 @*/ 1425 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1426 { 1427 PetscErrorCode ierr; 1428 PetscMemType rootmtype,leafmtype; 1429 1430 PetscFunctionBegin; 1431 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1432 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1433 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1434 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1435 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1436 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1437 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1438 PetscFunctionReturn(0); 1439 } 1440 1441 /*@C 1442 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1443 1444 Collective on PetscSF 1445 1446 Input Parameters: 1447 + sf - star forest on which to communicate 1448 . unit - data type associated with each node 1449 . rootmtype - memory type of rootdata 1450 . rootdata - buffer to broadcast 1451 . leafmtype - memory type of leafdata 1452 - op - operation to use for reduction 1453 1454 Output Parameter: 1455 . leafdata - buffer to be reduced with values from each leaf's respective root 1456 1457 Level: intermediate 1458 1459 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1460 @*/ 1461 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1462 { 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1467 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1468 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1469 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1470 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1471 PetscFunctionReturn(0); 1472 } 1473 1474 /*@C 1475 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1476 1477 Collective 1478 1479 Input Parameters: 1480 + sf - star forest 1481 . unit - data type 1482 . rootdata - buffer to broadcast 1483 - op - operation to use for reduction 1484 1485 Output Parameter: 1486 . leafdata - buffer to be reduced with values from each leaf's respective root 1487 1488 Level: intermediate 1489 1490 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1491 @*/ 1492 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1493 { 1494 PetscErrorCode ierr; 1495 1496 PetscFunctionBegin; 1497 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1498 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1499 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1500 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1501 PetscFunctionReturn(0); 1502 } 1503 1504 /*@C 1505 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1506 1507 Collective 1508 1509 Input Parameters: 1510 + sf - star forest 1511 . unit - data type 1512 . leafdata - values to reduce 1513 - op - reduction operation 1514 1515 Output Parameter: 1516 . rootdata - result of reduction of values from all leaves of each root 1517 1518 Level: intermediate 1519 1520 Notes: 1521 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1522 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1523 use PetscSFReduceWithMemTypeBegin() instead. 1524 1525 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1526 @*/ 1527 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1528 { 1529 PetscErrorCode ierr; 1530 PetscMemType rootmtype,leafmtype; 1531 1532 PetscFunctionBegin; 1533 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1534 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1535 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1536 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1537 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1538 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1539 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1540 PetscFunctionReturn(0); 1541 } 1542 1543 /*@C 1544 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1545 1546 Collective 1547 1548 Input Parameters: 1549 + sf - star forest 1550 . unit - data type 1551 . leafmtype - memory type of leafdata 1552 . leafdata - values to reduce 1553 . rootmtype - memory type of rootdata 1554 - op - reduction operation 1555 1556 Output Parameter: 1557 . rootdata - result of reduction of values from all leaves of each root 1558 1559 Level: intermediate 1560 1561 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1562 @*/ 1563 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1564 { 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1569 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1570 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1571 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1572 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1573 PetscFunctionReturn(0); 1574 } 1575 1576 /*@C 1577 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1578 1579 Collective 1580 1581 Input Parameters: 1582 + sf - star forest 1583 . unit - data type 1584 . leafdata - values to reduce 1585 - op - reduction operation 1586 1587 Output Parameter: 1588 . rootdata - result of reduction of values from all leaves of each root 1589 1590 Level: intermediate 1591 1592 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1593 @*/ 1594 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1595 { 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1600 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1601 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1602 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /*@C 1607 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1608 1609 Collective 1610 1611 Input Parameters: 1612 + sf - star forest 1613 . unit - data type 1614 . leafdata - leaf values to use in reduction 1615 - op - operation to use for reduction 1616 1617 Output Parameters: 1618 + rootdata - root values to be updated, input state is seen by first process to perform an update 1619 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1620 1621 Level: advanced 1622 1623 Note: 1624 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1625 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1626 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1627 integers. 1628 1629 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1630 @*/ 1631 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1632 { 1633 PetscErrorCode ierr; 1634 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1635 1636 PetscFunctionBegin; 1637 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1638 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1639 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1640 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1641 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1642 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1643 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1644 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1645 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1646 PetscFunctionReturn(0); 1647 } 1648 1649 /*@C 1650 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1651 1652 Collective 1653 1654 Input Parameters: 1655 + sf - star forest 1656 . unit - data type 1657 . leafdata - leaf values to use in reduction 1658 - op - operation to use for reduction 1659 1660 Output Parameters: 1661 + rootdata - root values to be updated, input state is seen by first process to perform an update 1662 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1663 1664 Level: advanced 1665 1666 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1667 @*/ 1668 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1669 { 1670 PetscErrorCode ierr; 1671 1672 PetscFunctionBegin; 1673 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1674 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1675 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1676 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 /*@C 1681 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1682 1683 Collective 1684 1685 Input Parameter: 1686 . sf - star forest 1687 1688 Output Parameter: 1689 . degree - degree of each root vertex 1690 1691 Level: advanced 1692 1693 Notes: 1694 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1695 1696 .seealso: PetscSFGatherBegin() 1697 @*/ 1698 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1699 { 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1704 PetscSFCheckGraphSet(sf,1); 1705 PetscValidPointer(degree,2); 1706 if (!sf->degreeknown) { 1707 PetscInt i, nroots = sf->nroots, maxlocal; 1708 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1709 maxlocal = sf->maxleaf-sf->minleaf+1; 1710 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1711 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1712 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1713 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1714 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1715 } 1716 *degree = NULL; 1717 PetscFunctionReturn(0); 1718 } 1719 1720 /*@C 1721 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1722 1723 Collective 1724 1725 Input Parameter: 1726 . sf - star forest 1727 1728 Output Parameter: 1729 . degree - degree of each root vertex 1730 1731 Level: developer 1732 1733 Notes: 1734 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1735 1736 .seealso: 1737 @*/ 1738 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1739 { 1740 PetscErrorCode ierr; 1741 1742 PetscFunctionBegin; 1743 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1744 PetscSFCheckGraphSet(sf,1); 1745 PetscValidPointer(degree,2); 1746 if (!sf->degreeknown) { 1747 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1748 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1749 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1750 sf->degreeknown = PETSC_TRUE; 1751 } 1752 *degree = sf->degree; 1753 PetscFunctionReturn(0); 1754 } 1755 1756 /*@C 1757 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1758 Each multi-root is assigned index of the corresponding original root. 1759 1760 Collective 1761 1762 Input Parameters: 1763 + sf - star forest 1764 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1765 1766 Output Parameters: 1767 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1768 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1769 1770 Level: developer 1771 1772 Notes: 1773 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1774 1775 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1776 @*/ 1777 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1778 { 1779 PetscSF msf; 1780 PetscInt i, j, k, nroots, nmroots; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1785 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1786 if (nroots) PetscValidIntPointer(degree,2); 1787 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1788 PetscValidPointer(multiRootsOrigNumbering,4); 1789 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1790 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1791 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1792 for (i=0,j=0,k=0; i<nroots; i++) { 1793 if (!degree[i]) continue; 1794 for (j=0; j<degree[i]; j++,k++) { 1795 (*multiRootsOrigNumbering)[k] = i; 1796 } 1797 } 1798 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1799 if (nMultiRoots) *nMultiRoots = nmroots; 1800 PetscFunctionReturn(0); 1801 } 1802 1803 /*@C 1804 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1805 1806 Collective 1807 1808 Input Parameters: 1809 + sf - star forest 1810 . unit - data type 1811 - leafdata - leaf data to gather to roots 1812 1813 Output Parameter: 1814 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1815 1816 Level: intermediate 1817 1818 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1819 @*/ 1820 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1821 { 1822 PetscErrorCode ierr; 1823 PetscSF multi = NULL; 1824 1825 PetscFunctionBegin; 1826 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1827 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1828 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1829 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 1833 /*@C 1834 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1835 1836 Collective 1837 1838 Input Parameters: 1839 + sf - star forest 1840 . unit - data type 1841 - leafdata - leaf data to gather to roots 1842 1843 Output Parameter: 1844 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1845 1846 Level: intermediate 1847 1848 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1849 @*/ 1850 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1851 { 1852 PetscErrorCode ierr; 1853 PetscSF multi = NULL; 1854 1855 PetscFunctionBegin; 1856 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1857 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1858 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 /*@C 1863 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1864 1865 Collective 1866 1867 Input Parameters: 1868 + sf - star forest 1869 . unit - data type 1870 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1871 1872 Output Parameter: 1873 . leafdata - leaf data to be update with personal data from each respective root 1874 1875 Level: intermediate 1876 1877 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1878 @*/ 1879 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1880 { 1881 PetscErrorCode ierr; 1882 PetscSF multi = NULL; 1883 1884 PetscFunctionBegin; 1885 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1886 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1887 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1888 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 /*@C 1893 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1894 1895 Collective 1896 1897 Input Parameters: 1898 + sf - star forest 1899 . unit - data type 1900 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1901 1902 Output Parameter: 1903 . leafdata - leaf data to be update with personal data from each respective root 1904 1905 Level: intermediate 1906 1907 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1908 @*/ 1909 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1910 { 1911 PetscErrorCode ierr; 1912 PetscSF multi = NULL; 1913 1914 PetscFunctionBegin; 1915 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1916 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1917 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1922 { 1923 PetscInt i, n, nleaves; 1924 const PetscInt *ilocal = NULL; 1925 PetscHSetI seen; 1926 PetscErrorCode ierr; 1927 1928 PetscFunctionBegin; 1929 if (PetscDefined(USE_DEBUG)) { 1930 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1931 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1932 for (i = 0; i < nleaves; i++) { 1933 const PetscInt leaf = ilocal ? ilocal[i] : i; 1934 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1935 } 1936 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1937 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1938 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 /*@ 1944 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1945 1946 Input Parameters: 1947 + sfA - The first PetscSF 1948 - sfB - The second PetscSF 1949 1950 Output Parameters: 1951 . sfBA - The composite SF 1952 1953 Level: developer 1954 1955 Notes: 1956 Currently, the two SFs must be defined on congruent communicators and they must be true star 1957 forests, i.e. the same leaf is not connected with different roots. 1958 1959 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1960 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1961 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1962 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1963 1964 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1965 @*/ 1966 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1967 { 1968 PetscErrorCode ierr; 1969 const PetscSFNode *remotePointsA,*remotePointsB; 1970 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1971 const PetscInt *localPointsA,*localPointsB; 1972 PetscInt *localPointsBA; 1973 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1974 PetscBool denseB; 1975 1976 PetscFunctionBegin; 1977 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1978 PetscSFCheckGraphSet(sfA,1); 1979 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1980 PetscSFCheckGraphSet(sfB,2); 1981 PetscCheckSameComm(sfA,1,sfB,2); 1982 PetscValidPointer(sfBA,3); 1983 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1984 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1985 1986 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1987 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1988 if (localPointsA) { 1989 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1990 for (i=0; i<numRootsB; i++) { 1991 reorderedRemotePointsA[i].rank = -1; 1992 reorderedRemotePointsA[i].index = -1; 1993 } 1994 for (i=0; i<numLeavesA; i++) { 1995 if (localPointsA[i] >= numRootsB) continue; 1996 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1997 } 1998 remotePointsA = reorderedRemotePointsA; 1999 } 2000 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2001 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2002 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2003 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2004 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2005 2006 denseB = (PetscBool)!localPointsB; 2007 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2008 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2009 else numLeavesBA++; 2010 } 2011 if (denseB) { 2012 localPointsBA = NULL; 2013 remotePointsBA = leafdataB; 2014 } else { 2015 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2016 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2017 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2018 const PetscInt l = localPointsB ? localPointsB[i] : i; 2019 2020 if (leafdataB[l-minleaf].rank == -1) continue; 2021 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2022 localPointsBA[numLeavesBA] = l; 2023 numLeavesBA++; 2024 } 2025 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2026 } 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2028 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2029 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 /*@ 2034 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2035 2036 Input Parameters: 2037 + sfA - The first PetscSF 2038 - sfB - The second PetscSF 2039 2040 Output Parameters: 2041 . sfBA - The composite SF. 2042 2043 Level: developer 2044 2045 Notes: 2046 Currently, the two SFs must be defined on congruent communicators and they must be true star 2047 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2048 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2049 2050 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2051 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2052 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2053 a Reduce on sfB, on connected roots. 2054 2055 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2056 @*/ 2057 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2058 { 2059 PetscErrorCode ierr; 2060 const PetscSFNode *remotePointsA,*remotePointsB; 2061 PetscSFNode *remotePointsBA; 2062 const PetscInt *localPointsA,*localPointsB; 2063 PetscSFNode *reorderedRemotePointsA = NULL; 2064 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2065 MPI_Op op; 2066 #if defined(PETSC_USE_64BIT_INDICES) 2067 PetscBool iswin; 2068 #endif 2069 2070 PetscFunctionBegin; 2071 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2072 PetscSFCheckGraphSet(sfA,1); 2073 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2074 PetscSFCheckGraphSet(sfB,2); 2075 PetscCheckSameComm(sfA,1,sfB,2); 2076 PetscValidPointer(sfBA,3); 2077 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2078 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2079 2080 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2081 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2082 2083 /* TODO: Check roots of sfB have degree of 1 */ 2084 /* Once we implement it, we can replace the MPI_MAXLOC 2085 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2086 We use MPI_MAXLOC only to have a deterministic output from this routine if 2087 the root condition is not meet. 2088 */ 2089 op = MPI_MAXLOC; 2090 #if defined(PETSC_USE_64BIT_INDICES) 2091 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2092 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2093 if (iswin) op = MPI_REPLACE; 2094 #endif 2095 2096 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2097 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2098 for (i=0; i<maxleaf - minleaf + 1; i++) { 2099 reorderedRemotePointsA[i].rank = -1; 2100 reorderedRemotePointsA[i].index = -1; 2101 } 2102 if (localPointsA) { 2103 for (i=0; i<numLeavesA; i++) { 2104 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2105 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2106 } 2107 } else { 2108 for (i=0; i<numLeavesA; i++) { 2109 if (i > maxleaf || i < minleaf) continue; 2110 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2111 } 2112 } 2113 2114 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2115 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2116 for (i=0; i<numRootsB; i++) { 2117 remotePointsBA[i].rank = -1; 2118 remotePointsBA[i].index = -1; 2119 } 2120 2121 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2122 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2123 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2124 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2125 if (remotePointsBA[i].rank == -1) continue; 2126 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2127 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2128 localPointsBA[numLeavesBA] = i; 2129 numLeavesBA++; 2130 } 2131 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2132 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2133 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 /* 2138 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2139 2140 Input Parameters: 2141 . sf - The global PetscSF 2142 2143 Output Parameters: 2144 . out - The local PetscSF 2145 */ 2146 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2147 { 2148 MPI_Comm comm; 2149 PetscMPIInt myrank; 2150 const PetscInt *ilocal; 2151 const PetscSFNode *iremote; 2152 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2153 PetscSFNode *liremote; 2154 PetscSF lsf; 2155 PetscErrorCode ierr; 2156 2157 PetscFunctionBegin; 2158 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2159 if (sf->ops->CreateLocalSF) { 2160 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2161 } else { 2162 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2163 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2164 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2165 2166 /* Find out local edges and build a local SF */ 2167 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2168 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2169 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2170 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2171 2172 for (i=j=0; i<nleaves; i++) { 2173 if (iremote[i].rank == (PetscInt)myrank) { 2174 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2175 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2176 liremote[j].index = iremote[i].index; 2177 j++; 2178 } 2179 } 2180 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2181 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2182 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2183 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2184 *out = lsf; 2185 } 2186 PetscFunctionReturn(0); 2187 } 2188 2189 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2190 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2191 { 2192 PetscErrorCode ierr; 2193 PetscMemType rootmtype,leafmtype; 2194 2195 PetscFunctionBegin; 2196 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2197 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2198 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2199 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2200 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2201 if (sf->ops->BcastToZero) { 2202 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2203 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2204 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208