1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h> 353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h> 595fce210SBarry Smith 67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 77fd2d3dbSJunchao Zhang #include <cuda_runtime.h> 8715b587bSJunchao Zhang #include <petscdevice_cuda.h> 97fd2d3dbSJunchao Zhang #endif 107fd2d3dbSJunchao Zhang 117fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP) 127fd2d3dbSJunchao Zhang #include <hip/hip_runtime.h> 137fd2d3dbSJunchao Zhang #endif 147fd2d3dbSJunchao Zhang 152abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER) 164bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int); 172abc8c78SJacob Faibussowitsch #else 1895fce210SBarry Smith #if defined(PETSC_USE_DEBUG) 19a8f51744SPierre Jolivet #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 2095fce210SBarry Smith #else 219371c9d4SSatish Balay #define PetscSFCheckGraphSet(sf, arg) \ 229371c9d4SSatish Balay do { \ 239371c9d4SSatish Balay } while (0) 2495fce210SBarry Smith #endif 252abc8c78SJacob Faibussowitsch #endif 2695fce210SBarry Smith 274c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 281f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 2995fce210SBarry Smith 308af6ec1cSBarry Smith /*@ 3195fce210SBarry Smith PetscSFCreate - create a star forest communication context 3295fce210SBarry Smith 33d083f849SBarry Smith Collective 3495fce210SBarry Smith 354165533cSJose E. Roman Input Parameter: 3695fce210SBarry Smith . comm - communicator on which the star forest will operate 3795fce210SBarry Smith 384165533cSJose E. Roman Output Parameter: 3995fce210SBarry Smith . sf - new star forest context 4095fce210SBarry Smith 4120662ed9SBarry Smith Options Database Key: 426677b1c1SJunchao Zhang + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 436677b1c1SJunchao Zhang . -sf_type window - Use MPI-3 one-sided window for communication 446677b1c1SJunchao Zhang . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 456677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46dd5b3ca6SJunchao Zhang 4795fce210SBarry Smith Level: intermediate 4895fce210SBarry Smith 49cab54364SBarry Smith Note: 50cab54364SBarry Smith When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51cab54364SBarry Smith `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 5220662ed9SBarry Smith `SF`s are optimized and they have better performance than the general `SF`s. 53dd5b3ca6SJunchao Zhang 5438b5cf2dSJacob Faibussowitsch .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 5595fce210SBarry Smith @*/ 56d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57d71ae5a4SJacob Faibussowitsch { 5895fce210SBarry Smith PetscSF b; 5995fce210SBarry Smith 6095fce210SBarry Smith PetscFunctionBegin; 614f572ea9SToby Isaac PetscAssertPointer(sf, 2); 629566063dSJacob Faibussowitsch PetscCall(PetscSFInitializePackage()); 6395fce210SBarry Smith 649566063dSJacob Faibussowitsch PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 6595fce210SBarry Smith b->nroots = -1; 6695fce210SBarry Smith b->nleaves = -1; 6729046d53SLisandro Dalcin b->minleaf = PETSC_MAX_INT; 6829046d53SLisandro Dalcin b->maxleaf = PETSC_MIN_INT; 6995fce210SBarry Smith b->nranks = -1; 7095fce210SBarry Smith b->rankorder = PETSC_TRUE; 7195fce210SBarry Smith b->ingroup = MPI_GROUP_NULL; 7295fce210SBarry Smith b->outgroup = MPI_GROUP_NULL; 7395fce210SBarry Smith b->graphset = PETSC_FALSE; 7420c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 7520c24465SJunchao Zhang b->use_gpu_aware_mpi = use_gpu_aware_mpi; 7620c24465SJunchao Zhang b->use_stream_aware_mpi = PETSC_FALSE; 7771438e86SJunchao Zhang b->unknown_input_stream = PETSC_FALSE; 7827f636e8SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 7920c24465SJunchao Zhang b->backend = PETSCSF_BACKEND_KOKKOS; 8027f636e8SJunchao Zhang #elif defined(PETSC_HAVE_CUDA) 8127f636e8SJunchao Zhang b->backend = PETSCSF_BACKEND_CUDA; 8259af0bd3SScott Kruger #elif defined(PETSC_HAVE_HIP) 8359af0bd3SScott Kruger b->backend = PETSCSF_BACKEND_HIP; 8420c24465SJunchao Zhang #endif 8571438e86SJunchao Zhang 8671438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 8771438e86SJunchao Zhang b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 8871438e86SJunchao Zhang b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 899566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 909566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 9171438e86SJunchao Zhang #endif 9220c24465SJunchao Zhang #endif 9360c22052SBarry Smith b->vscat.from_n = -1; 9460c22052SBarry Smith b->vscat.to_n = -1; 9560c22052SBarry Smith b->vscat.unit = MPIU_SCALAR; 9695fce210SBarry Smith *sf = b; 973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9895fce210SBarry Smith } 9995fce210SBarry Smith 10029046d53SLisandro Dalcin /*@ 10195fce210SBarry Smith PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 10295fce210SBarry Smith 10395fce210SBarry Smith Collective 10495fce210SBarry Smith 1054165533cSJose E. Roman Input Parameter: 10695fce210SBarry Smith . sf - star forest 10795fce210SBarry Smith 10895fce210SBarry Smith Level: advanced 10995fce210SBarry Smith 110cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 11195fce210SBarry Smith @*/ 112d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf) 113d71ae5a4SJacob Faibussowitsch { 11495fce210SBarry Smith PetscFunctionBegin; 11595fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 116dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Reset); 1170dd791a8SStefano Zampini PetscCall(PetscSFDestroy(&sf->rankssf)); 1180dd791a8SStefano Zampini 11929046d53SLisandro Dalcin sf->nroots = -1; 12029046d53SLisandro Dalcin sf->nleaves = -1; 12129046d53SLisandro Dalcin sf->minleaf = PETSC_MAX_INT; 12229046d53SLisandro Dalcin sf->maxleaf = PETSC_MIN_INT; 12395fce210SBarry Smith sf->mine = NULL; 12495fce210SBarry Smith sf->remote = NULL; 12529046d53SLisandro Dalcin sf->graphset = PETSC_FALSE; 1269566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->mine_alloc)); 1279566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->remote_alloc)); 12821c688dcSJed Brown sf->nranks = -1; 1299566063dSJacob Faibussowitsch PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 13029046d53SLisandro Dalcin sf->degreeknown = PETSC_FALSE; 1319566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->degree)); 1329566063dSJacob Faibussowitsch if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 1339566063dSJacob Faibussowitsch if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 1340dd791a8SStefano Zampini 135013b3241SStefano Zampini if (sf->multi) sf->multi->multi = NULL; 1369566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf->multi)); 1370dd791a8SStefano Zampini 1389566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sf->map)); 13971438e86SJunchao Zhang 14071438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 1419566063dSJacob Faibussowitsch for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 14271438e86SJunchao Zhang #endif 14371438e86SJunchao Zhang 14495fce210SBarry Smith sf->setupcalled = PETSC_FALSE; 1453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14695fce210SBarry Smith } 14795fce210SBarry Smith 148cc4c1da9SBarry Smith /*@ 149cab54364SBarry Smith PetscSFSetType - Set the `PetscSF` communication implementation 15095fce210SBarry Smith 151c3339decSBarry Smith Collective 15295fce210SBarry Smith 15395fce210SBarry Smith Input Parameters: 154cab54364SBarry Smith + sf - the `PetscSF` context 15595fce210SBarry Smith - type - a known method 156cab54364SBarry Smith .vb 157cab54364SBarry Smith PETSCSFWINDOW - MPI-2/3 one-sided 158cab54364SBarry Smith PETSCSFBASIC - basic implementation using MPI-1 two-sided 159cab54364SBarry Smith .ve 16095fce210SBarry Smith 16195fce210SBarry Smith Options Database Key: 16220662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 163cab54364SBarry Smith 164cab54364SBarry Smith Level: intermediate 16595fce210SBarry Smith 16695fce210SBarry Smith Notes: 16720662ed9SBarry Smith See `PetscSFType` for possible values 16895fce210SBarry Smith 16920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 17095fce210SBarry Smith @*/ 171d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 172d71ae5a4SJacob Faibussowitsch { 17395fce210SBarry Smith PetscBool match; 1745f80ce2aSJacob Faibussowitsch PetscErrorCode (*r)(PetscSF); 17595fce210SBarry Smith 17695fce210SBarry Smith PetscFunctionBegin; 17795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1784f572ea9SToby Isaac PetscAssertPointer(type, 2); 17995fce210SBarry Smith 1809566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 1813ba16761SJacob Faibussowitsch if (match) PetscFunctionReturn(PETSC_SUCCESS); 18295fce210SBarry Smith 1839566063dSJacob Faibussowitsch PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 1846adde796SStefano Zampini PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 18529046d53SLisandro Dalcin /* Destroy the previous PetscSF implementation context */ 186dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Destroy); 1879566063dSJacob Faibussowitsch PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 1889566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 1899566063dSJacob Faibussowitsch PetscCall((*r)(sf)); 1903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19195fce210SBarry Smith } 19295fce210SBarry Smith 193cc4c1da9SBarry Smith /*@ 194cab54364SBarry Smith PetscSFGetType - Get the `PetscSF` communication implementation 19529046d53SLisandro Dalcin 19629046d53SLisandro Dalcin Not Collective 19729046d53SLisandro Dalcin 19829046d53SLisandro Dalcin Input Parameter: 199cab54364SBarry Smith . sf - the `PetscSF` context 20029046d53SLisandro Dalcin 20129046d53SLisandro Dalcin Output Parameter: 202cab54364SBarry Smith . type - the `PetscSF` type name 20329046d53SLisandro Dalcin 20429046d53SLisandro Dalcin Level: intermediate 20529046d53SLisandro Dalcin 20620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 20729046d53SLisandro Dalcin @*/ 208d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 209d71ae5a4SJacob Faibussowitsch { 21029046d53SLisandro Dalcin PetscFunctionBegin; 21129046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2124f572ea9SToby Isaac PetscAssertPointer(type, 2); 21329046d53SLisandro Dalcin *type = ((PetscObject)sf)->type_name; 2143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21529046d53SLisandro Dalcin } 21629046d53SLisandro Dalcin 2170764c050SBarry Smith /*@ 21820662ed9SBarry Smith PetscSFDestroy - destroy a star forest 21995fce210SBarry Smith 22095fce210SBarry Smith Collective 22195fce210SBarry Smith 2224165533cSJose E. Roman Input Parameter: 22395fce210SBarry Smith . sf - address of star forest 22495fce210SBarry Smith 22595fce210SBarry Smith Level: intermediate 22695fce210SBarry Smith 22720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 22895fce210SBarry Smith @*/ 229d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf) 230d71ae5a4SJacob Faibussowitsch { 23195fce210SBarry Smith PetscFunctionBegin; 2323ba16761SJacob Faibussowitsch if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 233f4f49eeaSPierre Jolivet PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 234f4f49eeaSPierre Jolivet if (--((PetscObject)*sf)->refct > 0) { 2359371c9d4SSatish Balay *sf = NULL; 2363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2379371c9d4SSatish Balay } 2389566063dSJacob Faibussowitsch PetscCall(PetscSFReset(*sf)); 239f4f49eeaSPierre Jolivet PetscTryTypeMethod(*sf, Destroy); 2409566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 2419566063dSJacob Faibussowitsch if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 242c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 243715b587bSJunchao Zhang if ((*sf)->use_stream_aware_mpi) { 244715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 245715b587bSJunchao Zhang PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 246715b587bSJunchao Zhang } 247715b587bSJunchao Zhang #endif 2489566063dSJacob Faibussowitsch PetscCall(PetscHeaderDestroy(sf)); 2493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25095fce210SBarry Smith } 25195fce210SBarry Smith 252d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 253d71ae5a4SJacob Faibussowitsch { 254c4e6a40aSLawrence Mitchell PetscInt i, nleaves; 255c4e6a40aSLawrence Mitchell PetscMPIInt size; 256c4e6a40aSLawrence Mitchell const PetscInt *ilocal; 257c4e6a40aSLawrence Mitchell const PetscSFNode *iremote; 258c4e6a40aSLawrence Mitchell 259c4e6a40aSLawrence Mitchell PetscFunctionBegin; 2603ba16761SJacob Faibussowitsch if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 2619566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 2629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 263c4e6a40aSLawrence Mitchell for (i = 0; i < nleaves; i++) { 264c4e6a40aSLawrence Mitchell const PetscInt rank = iremote[i].rank; 265c4e6a40aSLawrence Mitchell const PetscInt remote = iremote[i].index; 266c4e6a40aSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 267c9cc58a2SBarry Smith PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 26808401ef6SPierre Jolivet PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 26908401ef6SPierre Jolivet PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 270c4e6a40aSLawrence Mitchell } 2713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 272c4e6a40aSLawrence Mitchell } 273c4e6a40aSLawrence Mitchell 27495fce210SBarry Smith /*@ 27520662ed9SBarry Smith PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 27695fce210SBarry Smith 27795fce210SBarry Smith Collective 27895fce210SBarry Smith 2794165533cSJose E. Roman Input Parameter: 28095fce210SBarry Smith . sf - star forest communication object 28195fce210SBarry Smith 28295fce210SBarry Smith Level: beginner 28395fce210SBarry Smith 28420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 28595fce210SBarry Smith @*/ 286d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf) 287d71ae5a4SJacob Faibussowitsch { 28895fce210SBarry Smith PetscFunctionBegin; 28929046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 29029046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 2913ba16761SJacob Faibussowitsch if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 2929566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 2939566063dSJacob Faibussowitsch PetscCall(PetscSFCheckGraphValid_Private(sf)); 2949566063dSJacob Faibussowitsch if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 295dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, SetUp); 29620c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 29720c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_CUDA) { 29871438e86SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_CUDA; 29971438e86SJunchao Zhang sf->ops->Free = PetscSFFree_CUDA; 30020c24465SJunchao Zhang } 30120c24465SJunchao Zhang #endif 30259af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 30359af0bd3SScott Kruger if (sf->backend == PETSCSF_BACKEND_HIP) { 30459af0bd3SScott Kruger sf->ops->Malloc = PetscSFMalloc_HIP; 30559af0bd3SScott Kruger sf->ops->Free = PetscSFFree_HIP; 30659af0bd3SScott Kruger } 30759af0bd3SScott Kruger #endif 30820c24465SJunchao Zhang 30920c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 31020c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 31120c24465SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_Kokkos; 31220c24465SJunchao Zhang sf->ops->Free = PetscSFFree_Kokkos; 31320c24465SJunchao Zhang } 31420c24465SJunchao Zhang #endif 3159566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 31695fce210SBarry Smith sf->setupcalled = PETSC_TRUE; 3173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31895fce210SBarry Smith } 31995fce210SBarry Smith 3208af6ec1cSBarry Smith /*@ 321cab54364SBarry Smith PetscSFSetFromOptions - set `PetscSF` options using the options database 32295fce210SBarry Smith 32395fce210SBarry Smith Logically Collective 32495fce210SBarry Smith 3254165533cSJose E. Roman Input Parameter: 32695fce210SBarry Smith . sf - star forest 32795fce210SBarry Smith 32895fce210SBarry Smith Options Database Keys: 32920662ed9SBarry Smith + -sf_type - implementation type, see `PetscSFSetType()` 33051ccb202SJunchao Zhang . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 33120662ed9SBarry Smith . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 33220662ed9SBarry Smith use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 33320662ed9SBarry Smith If true, this option only works with `-use_gpu_aware_mpi 1`. 33420662ed9SBarry Smith . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 33520662ed9SBarry Smith If true, this option only works with `-use_gpu_aware_mpi 1`. 33695fce210SBarry Smith 33738b5cf2dSJacob Faibussowitsch - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 33859af0bd3SScott Kruger On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 33920c24465SJunchao Zhang the only available is kokkos. 34020c24465SJunchao Zhang 34195fce210SBarry Smith Level: intermediate 342cab54364SBarry Smith 343cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 34495fce210SBarry Smith @*/ 345d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 346d71ae5a4SJacob Faibussowitsch { 34795fce210SBarry Smith PetscSFType deft; 34895fce210SBarry Smith char type[256]; 34995fce210SBarry Smith PetscBool flg; 35095fce210SBarry Smith 35195fce210SBarry Smith PetscFunctionBegin; 35295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 353d0609cedSBarry Smith PetscObjectOptionsBegin((PetscObject)sf); 35495fce210SBarry Smith deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 3559566063dSJacob Faibussowitsch PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 3569566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(sf, flg ? type : deft)); 3579566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 3587fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 35920c24465SJunchao Zhang { 36020c24465SJunchao Zhang char backendstr[32] = {0}; 36159af0bd3SScott Kruger PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 36220c24465SJunchao Zhang /* Change the defaults set in PetscSFCreate() with command line options */ 363d5b43468SJose E. Roman PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 3649566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 3659566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 3669566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 3679566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 3689566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 36959af0bd3SScott Kruger #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 37020c24465SJunchao Zhang if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 37120c24465SJunchao Zhang else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 37259af0bd3SScott Kruger else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 37328b400f6SJacob Faibussowitsch else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 37420c24465SJunchao Zhang #elif defined(PETSC_HAVE_KOKKOS) 37508401ef6SPierre Jolivet PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 37620c24465SJunchao Zhang #endif 377715b587bSJunchao Zhang 378715b587bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 379715b587bSJunchao Zhang if (sf->use_stream_aware_mpi) { 380715b587bSJunchao Zhang MPI_Info info; 381715b587bSJunchao Zhang 382715b587bSJunchao Zhang PetscCallMPI(MPI_Info_create(&info)); 383715b587bSJunchao Zhang PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 384715b587bSJunchao Zhang PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 385715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 386715b587bSJunchao Zhang PetscCallMPI(MPI_Info_free(&info)); 387715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 388715b587bSJunchao Zhang } 389715b587bSJunchao Zhang #endif 39020c24465SJunchao Zhang } 391c2a741eeSJunchao Zhang #endif 392dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 393d0609cedSBarry Smith PetscOptionsEnd(); 3943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39595fce210SBarry Smith } 39695fce210SBarry Smith 39729046d53SLisandro Dalcin /*@ 39895fce210SBarry Smith PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 39995fce210SBarry Smith 40095fce210SBarry Smith Logically Collective 40195fce210SBarry Smith 4024165533cSJose E. Roman Input Parameters: 40395fce210SBarry Smith + sf - star forest 404cab54364SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 40595fce210SBarry Smith 40695fce210SBarry Smith Level: advanced 40795fce210SBarry Smith 40820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 40995fce210SBarry Smith @*/ 410d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 411d71ae5a4SJacob Faibussowitsch { 41295fce210SBarry Smith PetscFunctionBegin; 41395fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 41495fce210SBarry Smith PetscValidLogicalCollectiveBool(sf, flg, 2); 41528b400f6SJacob Faibussowitsch PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 41695fce210SBarry Smith sf->rankorder = flg; 4173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41895fce210SBarry Smith } 41995fce210SBarry Smith 4205d83a8b1SBarry Smith /*@ 42195fce210SBarry Smith PetscSFSetGraph - Set a parallel star forest 42295fce210SBarry Smith 42395fce210SBarry Smith Collective 42495fce210SBarry Smith 4254165533cSJose E. Roman Input Parameters: 42695fce210SBarry Smith + sf - star forest 42795fce210SBarry Smith . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 42895fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 42920662ed9SBarry Smith . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 430c4e6a40aSLawrence Mitchell during setup in debug mode) 43120662ed9SBarry Smith . localmode - copy mode for `ilocal` 432c4e6a40aSLawrence Mitchell . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 433c4e6a40aSLawrence Mitchell during setup in debug mode) 43420662ed9SBarry Smith - remotemode - copy mode for `iremote` 43595fce210SBarry Smith 43695fce210SBarry Smith Level: intermediate 43795fce210SBarry Smith 43895452b02SPatrick Sanan Notes: 43920662ed9SBarry Smith Leaf indices in `ilocal` must be unique, otherwise an error occurs. 44038ab3f8aSBarry Smith 44120662ed9SBarry Smith Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 44220662ed9SBarry Smith In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 443db2b9530SVaclav Hapla PETSc might modify the respective array; 44420662ed9SBarry Smith if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 445cab54364SBarry Smith Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 446db2b9530SVaclav Hapla 44738b5cf2dSJacob Faibussowitsch Fortran Notes: 44820662ed9SBarry Smith In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 449c4e6a40aSLawrence Mitchell 45038b5cf2dSJacob Faibussowitsch Developer Notes: 451db2b9530SVaclav Hapla We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 45220662ed9SBarry Smith This also allows to compare leaf sets of two `PetscSF`s easily. 45372bf8598SVaclav Hapla 45420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 45595fce210SBarry Smith @*/ 456d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 457d71ae5a4SJacob Faibussowitsch { 458db2b9530SVaclav Hapla PetscBool unique, contiguous; 45995fce210SBarry Smith 46095fce210SBarry Smith PetscFunctionBegin; 46195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 4624f572ea9SToby Isaac if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 4634f572ea9SToby Isaac if (nleaves > 0) PetscAssertPointer(iremote, 6); 46408401ef6SPierre Jolivet PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 46508401ef6SPierre Jolivet PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 4668da24d32SBarry Smith /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 4678da24d32SBarry Smith * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 4688da24d32SBarry Smith PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 4698da24d32SBarry Smith PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 47029046d53SLisandro Dalcin 4712a67d2daSStefano Zampini if (sf->nroots >= 0) { /* Reset only if graph already set */ 4729566063dSJacob Faibussowitsch PetscCall(PetscSFReset(sf)); 4732a67d2daSStefano Zampini } 4742a67d2daSStefano Zampini 4759566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 47629046d53SLisandro Dalcin 47795fce210SBarry Smith sf->nroots = nroots; 47895fce210SBarry Smith sf->nleaves = nleaves; 47929046d53SLisandro Dalcin 480db2b9530SVaclav Hapla if (localmode == PETSC_COPY_VALUES && ilocal) { 481db2b9530SVaclav Hapla PetscInt *tlocal = NULL; 482db2b9530SVaclav Hapla 4839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &tlocal)); 4849566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 485db2b9530SVaclav Hapla ilocal = tlocal; 486db2b9530SVaclav Hapla } 487db2b9530SVaclav Hapla if (remotemode == PETSC_COPY_VALUES) { 488db2b9530SVaclav Hapla PetscSFNode *tremote = NULL; 489db2b9530SVaclav Hapla 4909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &tremote)); 4919566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 492db2b9530SVaclav Hapla iremote = tremote; 493db2b9530SVaclav Hapla } 494db2b9530SVaclav Hapla 49529046d53SLisandro Dalcin if (nleaves && ilocal) { 496db2b9530SVaclav Hapla PetscSFNode work; 497db2b9530SVaclav Hapla 4989566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 4999566063dSJacob Faibussowitsch PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 500db2b9530SVaclav Hapla unique = PetscNot(unique); 501db2b9530SVaclav Hapla PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 502db2b9530SVaclav Hapla sf->minleaf = ilocal[0]; 503db2b9530SVaclav Hapla sf->maxleaf = ilocal[nleaves - 1]; 504db2b9530SVaclav Hapla contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 50529046d53SLisandro Dalcin } else { 50629046d53SLisandro Dalcin sf->minleaf = 0; 50729046d53SLisandro Dalcin sf->maxleaf = nleaves - 1; 508db2b9530SVaclav Hapla unique = PETSC_TRUE; 509db2b9530SVaclav Hapla contiguous = PETSC_TRUE; 51029046d53SLisandro Dalcin } 51129046d53SLisandro Dalcin 512db2b9530SVaclav Hapla if (contiguous) { 513db2b9530SVaclav Hapla if (localmode == PETSC_USE_POINTER) { 514db2b9530SVaclav Hapla ilocal = NULL; 515db2b9530SVaclav Hapla } else { 5169566063dSJacob Faibussowitsch PetscCall(PetscFree(ilocal)); 517db2b9530SVaclav Hapla } 518db2b9530SVaclav Hapla } 519db2b9530SVaclav Hapla sf->mine = ilocal; 520db2b9530SVaclav Hapla if (localmode == PETSC_USE_POINTER) { 52129046d53SLisandro Dalcin sf->mine_alloc = NULL; 522db2b9530SVaclav Hapla } else { 523db2b9530SVaclav Hapla sf->mine_alloc = ilocal; 52495fce210SBarry Smith } 525db2b9530SVaclav Hapla sf->remote = iremote; 526db2b9530SVaclav Hapla if (remotemode == PETSC_USE_POINTER) { 52729046d53SLisandro Dalcin sf->remote_alloc = NULL; 528db2b9530SVaclav Hapla } else { 529db2b9530SVaclav Hapla sf->remote_alloc = iremote; 53095fce210SBarry Smith } 5319566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 53229046d53SLisandro Dalcin sf->graphset = PETSC_TRUE; 5333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 53495fce210SBarry Smith } 53595fce210SBarry Smith 53629046d53SLisandro Dalcin /*@ 537cab54364SBarry Smith PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 538dd5b3ca6SJunchao Zhang 539dd5b3ca6SJunchao Zhang Collective 540dd5b3ca6SJunchao Zhang 541dd5b3ca6SJunchao Zhang Input Parameters: 542cab54364SBarry Smith + sf - The `PetscSF` 543cab54364SBarry Smith . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 544cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 545cab54364SBarry Smith 546cab54364SBarry Smith Level: intermediate 547dd5b3ca6SJunchao Zhang 548dd5b3ca6SJunchao Zhang Notes: 54920662ed9SBarry Smith It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 55020662ed9SBarry Smith `n` and `N` are the local and global sizes of `x` respectively. 551dd5b3ca6SJunchao Zhang 55220662ed9SBarry Smith With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 55320662ed9SBarry Smith sequential vectors `y` on all MPI processes. 554dd5b3ca6SJunchao Zhang 55520662ed9SBarry Smith With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 55620662ed9SBarry Smith sequential vector `y` on rank 0. 557dd5b3ca6SJunchao Zhang 55820662ed9SBarry Smith In above cases, entries of `x` are roots and entries of `y` are leaves. 559dd5b3ca6SJunchao Zhang 56020662ed9SBarry Smith With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 561dd5b3ca6SJunchao Zhang creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 562cab54364SBarry Smith of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 563dd5b3ca6SJunchao Zhang not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 564cab54364SBarry Smith items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 565dd5b3ca6SJunchao Zhang 566dd5b3ca6SJunchao Zhang In this case, roots and leaves are symmetric. 567dd5b3ca6SJunchao Zhang 568cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 569dd5b3ca6SJunchao Zhang @*/ 570d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 571d71ae5a4SJacob Faibussowitsch { 572dd5b3ca6SJunchao Zhang MPI_Comm comm; 573dd5b3ca6SJunchao Zhang PetscInt n, N, res[2]; 574dd5b3ca6SJunchao Zhang PetscMPIInt rank, size; 575dd5b3ca6SJunchao Zhang PetscSFType type; 576dd5b3ca6SJunchao Zhang 577dd5b3ca6SJunchao Zhang PetscFunctionBegin; 5782abc8c78SJacob Faibussowitsch PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 5794f572ea9SToby Isaac if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 5809566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 5812c71b3e2SJacob Faibussowitsch PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 5829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 584dd5b3ca6SJunchao Zhang 585dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLTOALL) { 586dd5b3ca6SJunchao Zhang type = PETSCSFALLTOALL; 5879566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &sf->map)); 5889566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 5899566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 5909566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(sf->map)); 591dd5b3ca6SJunchao Zhang } else { 5929566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetLocalSize(map, &n)); 5939566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetSize(map, &N)); 594dd5b3ca6SJunchao Zhang res[0] = n; 595dd5b3ca6SJunchao Zhang res[1] = -n; 596dd5b3ca6SJunchao Zhang /* Check if n are same over all ranks so that we can optimize it */ 5971c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 598dd5b3ca6SJunchao Zhang if (res[0] == -res[1]) { /* same n */ 599dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 600dd5b3ca6SJunchao Zhang } else { 601dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 602dd5b3ca6SJunchao Zhang } 6039566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(map, &sf->map)); 604dd5b3ca6SJunchao Zhang } 6059566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(sf, type)); 606dd5b3ca6SJunchao Zhang 607dd5b3ca6SJunchao Zhang sf->pattern = pattern; 608dd5b3ca6SJunchao Zhang sf->mine = NULL; /* Contiguous */ 609dd5b3ca6SJunchao Zhang 610dd5b3ca6SJunchao Zhang /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 611dd5b3ca6SJunchao Zhang Also set other easy stuff. 612dd5b3ca6SJunchao Zhang */ 613dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLGATHER) { 614dd5b3ca6SJunchao Zhang sf->nleaves = N; 615dd5b3ca6SJunchao Zhang sf->nroots = n; 616dd5b3ca6SJunchao Zhang sf->nranks = size; 617dd5b3ca6SJunchao Zhang sf->minleaf = 0; 618dd5b3ca6SJunchao Zhang sf->maxleaf = N - 1; 619dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_GATHER) { 620dd5b3ca6SJunchao Zhang sf->nleaves = rank ? 0 : N; 621dd5b3ca6SJunchao Zhang sf->nroots = n; 622dd5b3ca6SJunchao Zhang sf->nranks = rank ? 0 : size; 623dd5b3ca6SJunchao Zhang sf->minleaf = 0; 624dd5b3ca6SJunchao Zhang sf->maxleaf = rank ? -1 : N - 1; 625dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 626dd5b3ca6SJunchao Zhang sf->nleaves = size; 627dd5b3ca6SJunchao Zhang sf->nroots = size; 628dd5b3ca6SJunchao Zhang sf->nranks = size; 629dd5b3ca6SJunchao Zhang sf->minleaf = 0; 630dd5b3ca6SJunchao Zhang sf->maxleaf = size - 1; 631dd5b3ca6SJunchao Zhang } 632dd5b3ca6SJunchao Zhang sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 633dd5b3ca6SJunchao Zhang sf->graphset = PETSC_TRUE; 6343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 635dd5b3ca6SJunchao Zhang } 636dd5b3ca6SJunchao Zhang 637dd5b3ca6SJunchao Zhang /*@ 638cab54364SBarry Smith PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 63995fce210SBarry Smith 64095fce210SBarry Smith Collective 64195fce210SBarry Smith 6424165533cSJose E. Roman Input Parameter: 64395fce210SBarry Smith . sf - star forest to invert 64495fce210SBarry Smith 6454165533cSJose E. Roman Output Parameter: 64620662ed9SBarry Smith . isf - inverse of `sf` 6474165533cSJose E. Roman 64895fce210SBarry Smith Level: advanced 64995fce210SBarry Smith 65095fce210SBarry Smith Notes: 65195fce210SBarry Smith All roots must have degree 1. 65295fce210SBarry Smith 65395fce210SBarry Smith The local space may be a permutation, but cannot be sparse. 65495fce210SBarry Smith 65520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 65695fce210SBarry Smith @*/ 657d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 658d71ae5a4SJacob Faibussowitsch { 65995fce210SBarry Smith PetscMPIInt rank; 66095fce210SBarry Smith PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 66195fce210SBarry Smith const PetscInt *ilocal; 66295fce210SBarry Smith PetscSFNode *roots, *leaves; 66395fce210SBarry Smith 66495fce210SBarry Smith PetscFunctionBegin; 66529046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 66629046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 6674f572ea9SToby Isaac PetscAssertPointer(isf, 2); 66829046d53SLisandro Dalcin 6699566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 67029046d53SLisandro Dalcin maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 67129046d53SLisandro Dalcin 6729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 6739566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 674ae9aee6dSMatthew G. Knepley for (i = 0; i < maxlocal; i++) { 67595fce210SBarry Smith leaves[i].rank = rank; 67695fce210SBarry Smith leaves[i].index = i; 67795fce210SBarry Smith } 67895fce210SBarry Smith for (i = 0; i < nroots; i++) { 67995fce210SBarry Smith roots[i].rank = -1; 68095fce210SBarry Smith roots[i].index = -1; 68195fce210SBarry Smith } 6829566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 6839566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 68495fce210SBarry Smith 68595fce210SBarry Smith /* Check whether our leaves are sparse */ 6869371c9d4SSatish Balay for (i = 0, count = 0; i < nroots; i++) 6879371c9d4SSatish Balay if (roots[i].rank >= 0) count++; 68895fce210SBarry Smith if (count == nroots) newilocal = NULL; 6899371c9d4SSatish Balay else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 69095fce210SBarry Smith for (i = 0, count = 0; i < nroots; i++) { 69195fce210SBarry Smith if (roots[i].rank >= 0) { 69295fce210SBarry Smith newilocal[count] = i; 69395fce210SBarry Smith roots[count].rank = roots[i].rank; 69495fce210SBarry Smith roots[count].index = roots[i].index; 69595fce210SBarry Smith count++; 69695fce210SBarry Smith } 69795fce210SBarry Smith } 69895fce210SBarry Smith } 69995fce210SBarry Smith 7009566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 7019566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 7029566063dSJacob Faibussowitsch PetscCall(PetscFree2(roots, leaves)); 7033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 70495fce210SBarry Smith } 70595fce210SBarry Smith 70695fce210SBarry Smith /*@ 707cab54364SBarry Smith PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 70895fce210SBarry Smith 70995fce210SBarry Smith Collective 71095fce210SBarry Smith 7114165533cSJose E. Roman Input Parameters: 71295fce210SBarry Smith + sf - communication object to duplicate 713cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 71495fce210SBarry Smith 7154165533cSJose E. Roman Output Parameter: 71695fce210SBarry Smith . newsf - new communication object 71795fce210SBarry Smith 71895fce210SBarry Smith Level: beginner 71995fce210SBarry Smith 72020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 72195fce210SBarry Smith @*/ 722d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 723d71ae5a4SJacob Faibussowitsch { 72429046d53SLisandro Dalcin PetscSFType type; 72597929ea7SJunchao Zhang MPI_Datatype dtype = MPIU_SCALAR; 72695fce210SBarry Smith 72795fce210SBarry Smith PetscFunctionBegin; 72829046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 72929046d53SLisandro Dalcin PetscValidLogicalCollectiveEnum(sf, opt, 2); 7304f572ea9SToby Isaac PetscAssertPointer(newsf, 3); 7319566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 7329566063dSJacob Faibussowitsch PetscCall(PetscSFGetType(sf, &type)); 7339566063dSJacob Faibussowitsch if (type) PetscCall(PetscSFSetType(*newsf, type)); 73435cb6cd3SPierre Jolivet (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 73595fce210SBarry Smith if (opt == PETSCSF_DUPLICATE_GRAPH) { 736dd5b3ca6SJunchao Zhang PetscSFCheckGraphSet(sf, 1); 737dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 73895fce210SBarry Smith PetscInt nroots, nleaves; 73995fce210SBarry Smith const PetscInt *ilocal; 74095fce210SBarry Smith const PetscSFNode *iremote; 7419566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 7429566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 743dd5b3ca6SJunchao Zhang } else { 7449566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 745dd5b3ca6SJunchao Zhang } 74695fce210SBarry Smith } 74797929ea7SJunchao Zhang /* Since oldtype is committed, so is newtype, according to MPI */ 7489566063dSJacob Faibussowitsch if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 74997929ea7SJunchao Zhang (*newsf)->vscat.bs = sf->vscat.bs; 75097929ea7SJunchao Zhang (*newsf)->vscat.unit = dtype; 75197929ea7SJunchao Zhang (*newsf)->vscat.to_n = sf->vscat.to_n; 75297929ea7SJunchao Zhang (*newsf)->vscat.from_n = sf->vscat.from_n; 75397929ea7SJunchao Zhang /* Do not copy lsf. Build it on demand since it is rarely used */ 75497929ea7SJunchao Zhang 75520c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 75620c24465SJunchao Zhang (*newsf)->backend = sf->backend; 75771438e86SJunchao Zhang (*newsf)->unknown_input_stream = sf->unknown_input_stream; 75820c24465SJunchao Zhang (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 75920c24465SJunchao Zhang (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 76020c24465SJunchao Zhang #endif 761dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 76220c24465SJunchao Zhang /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 7633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 76495fce210SBarry Smith } 76595fce210SBarry Smith 76695fce210SBarry Smith /*@C 76795fce210SBarry Smith PetscSFGetGraph - Get the graph specifying a parallel star forest 76895fce210SBarry Smith 76995fce210SBarry Smith Not Collective 77095fce210SBarry Smith 7714165533cSJose E. Roman Input Parameter: 77295fce210SBarry Smith . sf - star forest 77395fce210SBarry Smith 7744165533cSJose E. Roman Output Parameters: 77595fce210SBarry Smith + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 77695fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 77720662ed9SBarry Smith . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 77895fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process 77995fce210SBarry Smith 780cab54364SBarry Smith Level: intermediate 781cab54364SBarry Smith 782373e0d91SLisandro Dalcin Notes: 78320662ed9SBarry Smith We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 784373e0d91SLisandro Dalcin 78520662ed9SBarry Smith The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 786db2b9530SVaclav Hapla 7878dbb0df6SBarry Smith Fortran Notes: 78820662ed9SBarry Smith The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 78920662ed9SBarry Smith want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 7908dbb0df6SBarry Smith 79120662ed9SBarry Smith To check for a `NULL` `ilocal` use 7928dbb0df6SBarry Smith $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 793ca797d7aSLawrence Mitchell 79420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 79595fce210SBarry Smith @*/ 796d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 797d71ae5a4SJacob Faibussowitsch { 79895fce210SBarry Smith PetscFunctionBegin; 79995fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 800b8dee149SJunchao Zhang if (sf->ops->GetGraph) { 801f4f49eeaSPierre Jolivet PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 802b8dee149SJunchao Zhang } else { 80395fce210SBarry Smith if (nroots) *nroots = sf->nroots; 80495fce210SBarry Smith if (nleaves) *nleaves = sf->nleaves; 80595fce210SBarry Smith if (ilocal) *ilocal = sf->mine; 80695fce210SBarry Smith if (iremote) *iremote = sf->remote; 807b8dee149SJunchao Zhang } 8083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 80995fce210SBarry Smith } 81095fce210SBarry Smith 81129046d53SLisandro Dalcin /*@ 81295fce210SBarry Smith PetscSFGetLeafRange - Get the active leaf ranges 81395fce210SBarry Smith 81495fce210SBarry Smith Not Collective 81595fce210SBarry Smith 8164165533cSJose E. Roman Input Parameter: 81795fce210SBarry Smith . sf - star forest 81895fce210SBarry Smith 8194165533cSJose E. Roman Output Parameters: 82020662ed9SBarry Smith + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 82120662ed9SBarry Smith - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 82295fce210SBarry Smith 82395fce210SBarry Smith Level: developer 82495fce210SBarry Smith 82520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 82695fce210SBarry Smith @*/ 827d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 828d71ae5a4SJacob Faibussowitsch { 82995fce210SBarry Smith PetscFunctionBegin; 83095fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 83129046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 83295fce210SBarry Smith if (minleaf) *minleaf = sf->minleaf; 83395fce210SBarry Smith if (maxleaf) *maxleaf = sf->maxleaf; 8343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 83595fce210SBarry Smith } 83695fce210SBarry Smith 837ffeef943SBarry Smith /*@ 838cab54364SBarry Smith PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 839fe2efc57SMark 84020f4b53cSBarry Smith Collective 841fe2efc57SMark 842fe2efc57SMark Input Parameters: 843fe2efc57SMark + A - the star forest 844cab54364SBarry Smith . obj - Optional object that provides the prefix for the option names 845736c3998SJose E. Roman - name - command line option 846fe2efc57SMark 847fe2efc57SMark Level: intermediate 848cab54364SBarry Smith 84920662ed9SBarry Smith Note: 85020662ed9SBarry Smith See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 85120662ed9SBarry Smith 852db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 853fe2efc57SMark @*/ 854d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 855d71ae5a4SJacob Faibussowitsch { 856fe2efc57SMark PetscFunctionBegin; 857fe2efc57SMark PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 8589566063dSJacob Faibussowitsch PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 8593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 860fe2efc57SMark } 861fe2efc57SMark 862ffeef943SBarry Smith /*@ 86395fce210SBarry Smith PetscSFView - view a star forest 86495fce210SBarry Smith 86595fce210SBarry Smith Collective 86695fce210SBarry Smith 8674165533cSJose E. Roman Input Parameters: 86895fce210SBarry Smith + sf - star forest 869cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 87095fce210SBarry Smith 87195fce210SBarry Smith Level: beginner 87295fce210SBarry Smith 873cab54364SBarry Smith .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 87495fce210SBarry Smith @*/ 875d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 876d71ae5a4SJacob Faibussowitsch { 87795fce210SBarry Smith PetscBool iascii; 87895fce210SBarry Smith PetscViewerFormat format; 87995fce210SBarry Smith 88095fce210SBarry Smith PetscFunctionBegin; 88195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 8829566063dSJacob Faibussowitsch if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 88395fce210SBarry Smith PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 88495fce210SBarry Smith PetscCheckSameComm(sf, 1, viewer, 2); 8859566063dSJacob Faibussowitsch if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 8869566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 88753dd6d7dSJunchao Zhang if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 88895fce210SBarry Smith PetscMPIInt rank; 88981bfa7aaSJed Brown PetscInt ii, i, j; 89095fce210SBarry Smith 8919566063dSJacob Faibussowitsch PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 8929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushTab(viewer)); 893dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 89480153354SVaclav Hapla if (!sf->graphset) { 8959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 8969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer)); 8973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89880153354SVaclav Hapla } 8999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 9009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 9019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 90248a46eb9SPierre Jolivet for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 9039566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9049566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 90595fce210SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 90681bfa7aaSJed Brown PetscMPIInt *tmpranks, *perm; 9079566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 9089566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 90981bfa7aaSJed Brown for (i = 0; i < sf->nranks; i++) perm[i] = i; 9109566063dSJacob Faibussowitsch PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 9119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 91281bfa7aaSJed Brown for (ii = 0; ii < sf->nranks; ii++) { 91381bfa7aaSJed Brown i = perm[ii]; 9149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 91548a46eb9SPierre Jolivet for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 91695fce210SBarry Smith } 9179566063dSJacob Faibussowitsch PetscCall(PetscFree2(tmpranks, perm)); 91895fce210SBarry Smith } 9199566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 921dd5b3ca6SJunchao Zhang } 9229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer)); 92395fce210SBarry Smith } 924dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, View, viewer); 9253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 92695fce210SBarry Smith } 92795fce210SBarry Smith 92895fce210SBarry Smith /*@C 929dec1416fSJunchao Zhang PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 93095fce210SBarry Smith 93195fce210SBarry Smith Not Collective 93295fce210SBarry Smith 9334165533cSJose E. Roman Input Parameter: 93495fce210SBarry Smith . sf - star forest 93595fce210SBarry Smith 9364165533cSJose E. Roman Output Parameters: 93795fce210SBarry Smith + nranks - number of ranks referenced by local part 93820662ed9SBarry Smith . ranks - [`nranks`] array of ranks 93920662ed9SBarry Smith . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 94020662ed9SBarry Smith . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank 94120662ed9SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank 94295fce210SBarry Smith 94395fce210SBarry Smith Level: developer 94495fce210SBarry Smith 945cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 94695fce210SBarry Smith @*/ 947d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 948d71ae5a4SJacob Faibussowitsch { 94995fce210SBarry Smith PetscFunctionBegin; 95095fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 95128b400f6SJacob Faibussowitsch PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 952dec1416fSJunchao Zhang if (sf->ops->GetRootRanks) { 9539927e4dfSBarry Smith PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 954dec1416fSJunchao Zhang } else { 955dec1416fSJunchao Zhang /* The generic implementation */ 95695fce210SBarry Smith if (nranks) *nranks = sf->nranks; 95795fce210SBarry Smith if (ranks) *ranks = sf->ranks; 95895fce210SBarry Smith if (roffset) *roffset = sf->roffset; 95995fce210SBarry Smith if (rmine) *rmine = sf->rmine; 96095fce210SBarry Smith if (rremote) *rremote = sf->rremote; 961dec1416fSJunchao Zhang } 9623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 96395fce210SBarry Smith } 96495fce210SBarry Smith 9658750ddebSJunchao Zhang /*@C 9668750ddebSJunchao Zhang PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 9678750ddebSJunchao Zhang 9688750ddebSJunchao Zhang Not Collective 9698750ddebSJunchao Zhang 9704165533cSJose E. Roman Input Parameter: 9718750ddebSJunchao Zhang . sf - star forest 9728750ddebSJunchao Zhang 9734165533cSJose E. Roman Output Parameters: 9748750ddebSJunchao Zhang + niranks - number of leaf ranks referencing roots on this process 97520662ed9SBarry Smith . iranks - [`niranks`] array of ranks 97620662ed9SBarry Smith . ioffset - [`niranks`+1] offset in `irootloc` for each rank 97720662ed9SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 9788750ddebSJunchao Zhang 9798750ddebSJunchao Zhang Level: developer 9808750ddebSJunchao Zhang 981cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()` 9828750ddebSJunchao Zhang @*/ 983d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 984d71ae5a4SJacob Faibussowitsch { 9858750ddebSJunchao Zhang PetscFunctionBegin; 9868750ddebSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 98728b400f6SJacob Faibussowitsch PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 9888750ddebSJunchao Zhang if (sf->ops->GetLeafRanks) { 9899927e4dfSBarry Smith PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 9908750ddebSJunchao Zhang } else { 9918750ddebSJunchao Zhang PetscSFType type; 9929566063dSJacob Faibussowitsch PetscCall(PetscSFGetType(sf, &type)); 99398921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 9948750ddebSJunchao Zhang } 9953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9968750ddebSJunchao Zhang } 9978750ddebSJunchao Zhang 998d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 999d71ae5a4SJacob Faibussowitsch { 1000b5a8e515SJed Brown PetscInt i; 1001b5a8e515SJed Brown for (i = 0; i < n; i++) { 1002b5a8e515SJed Brown if (needle == list[i]) return PETSC_TRUE; 1003b5a8e515SJed Brown } 1004b5a8e515SJed Brown return PETSC_FALSE; 1005b5a8e515SJed Brown } 1006b5a8e515SJed Brown 100795fce210SBarry Smith /*@C 1008cab54364SBarry Smith PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 100921c688dcSJed Brown 101021c688dcSJed Brown Collective 101121c688dcSJed Brown 10124165533cSJose E. Roman Input Parameters: 1013cab54364SBarry Smith + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1014cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 101521c688dcSJed Brown 101621c688dcSJed Brown Level: developer 101721c688dcSJed Brown 1018cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()` 101921c688dcSJed Brown @*/ 1020d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1021d71ae5a4SJacob Faibussowitsch { 1022eec179cfSJacob Faibussowitsch PetscHMapI table; 1023eec179cfSJacob Faibussowitsch PetscHashIter pos; 1024b5a8e515SJed Brown PetscMPIInt size, groupsize, *groupranks; 1025247e8311SStefano Zampini PetscInt *rcount, *ranks; 1026247e8311SStefano Zampini PetscInt i, irank = -1, orank = -1; 102721c688dcSJed Brown 102821c688dcSJed Brown PetscFunctionBegin; 102921c688dcSJed Brown PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 103029046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 10319566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1032eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(10, &table)); 103321c688dcSJed Brown for (i = 0; i < sf->nleaves; i++) { 103421c688dcSJed Brown /* Log 1-based rank */ 1035eec179cfSJacob Faibussowitsch PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 103621c688dcSJed Brown } 1037eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetSize(table, &sf->nranks)); 10389566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 10399566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1040eec179cfSJacob Faibussowitsch PetscHashIterBegin(table, pos); 104121c688dcSJed Brown for (i = 0; i < sf->nranks; i++) { 1042eec179cfSJacob Faibussowitsch PetscHashIterGetKey(table, pos, ranks[i]); 1043eec179cfSJacob Faibussowitsch PetscHashIterGetVal(table, pos, rcount[i]); 1044eec179cfSJacob Faibussowitsch PetscHashIterNext(table, pos); 104521c688dcSJed Brown ranks[i]--; /* Convert back to 0-based */ 104621c688dcSJed Brown } 1047eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&table)); 1048b5a8e515SJed Brown 1049b5a8e515SJed Brown /* We expect that dgroup is reliably "small" while nranks could be large */ 1050b5a8e515SJed Brown { 10517fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 1052b5a8e515SJed Brown PetscMPIInt *dgroupranks; 10539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 10549566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 10559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 10569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(groupsize, &groupranks)); 1057b5a8e515SJed Brown for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 10589566063dSJacob Faibussowitsch if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 10599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 10609566063dSJacob Faibussowitsch PetscCall(PetscFree(dgroupranks)); 1061b5a8e515SJed Brown } 1062b5a8e515SJed Brown 1063b5a8e515SJed Brown /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1064b5a8e515SJed Brown for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1065b5a8e515SJed Brown for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1066b5a8e515SJed Brown if (InList(ranks[i], groupsize, groupranks)) break; 1067b5a8e515SJed Brown } 1068b5a8e515SJed Brown for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1069b5a8e515SJed Brown if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1070b5a8e515SJed Brown } 1071b5a8e515SJed Brown if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1072b5a8e515SJed Brown PetscInt tmprank, tmpcount; 1073247e8311SStefano Zampini 1074b5a8e515SJed Brown tmprank = ranks[i]; 1075b5a8e515SJed Brown tmpcount = rcount[i]; 1076b5a8e515SJed Brown ranks[i] = ranks[sf->ndranks]; 1077b5a8e515SJed Brown rcount[i] = rcount[sf->ndranks]; 1078b5a8e515SJed Brown ranks[sf->ndranks] = tmprank; 1079b5a8e515SJed Brown rcount[sf->ndranks] = tmpcount; 1080b5a8e515SJed Brown sf->ndranks++; 1081b5a8e515SJed Brown } 1082b5a8e515SJed Brown } 10839566063dSJacob Faibussowitsch PetscCall(PetscFree(groupranks)); 10849566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 10855c0db29aSPierre Jolivet if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 108621c688dcSJed Brown sf->roffset[0] = 0; 108721c688dcSJed Brown for (i = 0; i < sf->nranks; i++) { 10889566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 108921c688dcSJed Brown sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 109021c688dcSJed Brown rcount[i] = 0; 109121c688dcSJed Brown } 1092247e8311SStefano Zampini for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1093247e8311SStefano Zampini /* short circuit */ 1094247e8311SStefano Zampini if (orank != sf->remote[i].rank) { 109521c688dcSJed Brown /* Search for index of iremote[i].rank in sf->ranks */ 10969566063dSJacob Faibussowitsch PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1097b5a8e515SJed Brown if (irank < 0) { 10989566063dSJacob Faibussowitsch PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1099b5a8e515SJed Brown if (irank >= 0) irank += sf->ndranks; 110021c688dcSJed Brown } 1101247e8311SStefano Zampini orank = sf->remote[i].rank; 1102247e8311SStefano Zampini } 110308401ef6SPierre Jolivet PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 110421c688dcSJed Brown sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 110521c688dcSJed Brown sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 110621c688dcSJed Brown rcount[irank]++; 110721c688dcSJed Brown } 11089566063dSJacob Faibussowitsch PetscCall(PetscFree2(rcount, ranks)); 11093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 111021c688dcSJed Brown } 111121c688dcSJed Brown 111221c688dcSJed Brown /*@C 111395fce210SBarry Smith PetscSFGetGroups - gets incoming and outgoing process groups 111495fce210SBarry Smith 111595fce210SBarry Smith Collective 111695fce210SBarry Smith 11174165533cSJose E. Roman Input Parameter: 111895fce210SBarry Smith . sf - star forest 111995fce210SBarry Smith 11204165533cSJose E. Roman Output Parameters: 112195fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots) 112295fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference) 112395fce210SBarry Smith 112495fce210SBarry Smith Level: developer 112595fce210SBarry Smith 1126cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 112795fce210SBarry Smith @*/ 1128d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1129d71ae5a4SJacob Faibussowitsch { 11307fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 113195fce210SBarry Smith 113295fce210SBarry Smith PetscFunctionBegin; 113308401ef6SPierre Jolivet PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 113495fce210SBarry Smith if (sf->ingroup == MPI_GROUP_NULL) { 113595fce210SBarry Smith PetscInt i; 113695fce210SBarry Smith const PetscInt *indegree; 113795fce210SBarry Smith PetscMPIInt rank, *outranks, *inranks; 113895fce210SBarry Smith PetscSFNode *remote; 113995fce210SBarry Smith PetscSF bgcount; 114095fce210SBarry Smith 114195fce210SBarry Smith /* Compute the number of incoming ranks */ 11429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nranks, &remote)); 114395fce210SBarry Smith for (i = 0; i < sf->nranks; i++) { 114495fce210SBarry Smith remote[i].rank = sf->ranks[i]; 114595fce210SBarry Smith remote[i].index = 0; 114695fce210SBarry Smith } 11479566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 11489566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 11499566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 11509566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 115195fce210SBarry Smith /* Enumerate the incoming ranks */ 11529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 11539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 115495fce210SBarry Smith for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 11559566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 11569566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 11579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 11589566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 11599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 11609566063dSJacob Faibussowitsch PetscCall(PetscFree2(inranks, outranks)); 11619566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&bgcount)); 116295fce210SBarry Smith } 116395fce210SBarry Smith *incoming = sf->ingroup; 116495fce210SBarry Smith 116595fce210SBarry Smith if (sf->outgroup == MPI_GROUP_NULL) { 11669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 11679566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 11689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 116995fce210SBarry Smith } 117095fce210SBarry Smith *outgoing = sf->outgroup; 11713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 117295fce210SBarry Smith } 117395fce210SBarry Smith 117429046d53SLisandro Dalcin /*@ 11750dd791a8SStefano Zampini PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 11760dd791a8SStefano Zampini 11770dd791a8SStefano Zampini Collective 11780dd791a8SStefano Zampini 11790dd791a8SStefano Zampini Input Parameter: 11800dd791a8SStefano Zampini . sf - star forest 11810dd791a8SStefano Zampini 11820dd791a8SStefano Zampini Output Parameter: 11830dd791a8SStefano Zampini . rsf - the star forest with a single root per process to perform communications 11840dd791a8SStefano Zampini 11850dd791a8SStefano Zampini Level: developer 11860dd791a8SStefano Zampini 11870dd791a8SStefano Zampini .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 11880dd791a8SStefano Zampini @*/ 11890dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 11900dd791a8SStefano Zampini { 11910dd791a8SStefano Zampini PetscFunctionBegin; 11920dd791a8SStefano Zampini PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 11930dd791a8SStefano Zampini PetscAssertPointer(rsf, 2); 11940dd791a8SStefano Zampini if (!sf->rankssf) { 11950dd791a8SStefano Zampini PetscSFNode *rremotes; 11960dd791a8SStefano Zampini const PetscMPIInt *ranks; 11970dd791a8SStefano Zampini PetscInt nranks; 11980dd791a8SStefano Zampini 11990dd791a8SStefano Zampini PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 12000dd791a8SStefano Zampini PetscCall(PetscMalloc1(nranks, &rremotes)); 12010dd791a8SStefano Zampini for (PetscInt i = 0; i < nranks; i++) { 12020dd791a8SStefano Zampini rremotes[i].rank = ranks[i]; 12030dd791a8SStefano Zampini rremotes[i].index = 0; 12040dd791a8SStefano Zampini } 12050dd791a8SStefano Zampini PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 12060dd791a8SStefano Zampini PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 12070dd791a8SStefano Zampini } 12080dd791a8SStefano Zampini *rsf = sf->rankssf; 12090dd791a8SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 12100dd791a8SStefano Zampini } 12110dd791a8SStefano Zampini 12120dd791a8SStefano Zampini /*@ 1213cab54364SBarry Smith PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 121495fce210SBarry Smith 121595fce210SBarry Smith Collective 121695fce210SBarry Smith 12174165533cSJose E. Roman Input Parameter: 121895fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex 121995fce210SBarry Smith 12204165533cSJose E. Roman Output Parameter: 122195fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1 122295fce210SBarry Smith 122395fce210SBarry Smith Level: developer 122495fce210SBarry Smith 1225cab54364SBarry Smith Note: 1226cab54364SBarry Smith In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 122795fce210SBarry Smith directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 122895fce210SBarry Smith edge, it is a candidate for future optimization that might involve its removal. 122995fce210SBarry Smith 1230cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 123195fce210SBarry Smith @*/ 1232d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1233d71ae5a4SJacob Faibussowitsch { 123495fce210SBarry Smith PetscFunctionBegin; 123595fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 12364f572ea9SToby Isaac PetscAssertPointer(multi, 2); 123795fce210SBarry Smith if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 12389566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 123995fce210SBarry Smith *multi = sf->multi; 1240013b3241SStefano Zampini sf->multi->multi = sf->multi; 12413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 124295fce210SBarry Smith } 124395fce210SBarry Smith if (!sf->multi) { 124495fce210SBarry Smith const PetscInt *indegree; 12459837ea96SMatthew G. Knepley PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 124695fce210SBarry Smith PetscSFNode *remote; 124729046d53SLisandro Dalcin maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 12489566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 12499566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 12509566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 125195fce210SBarry Smith inoffset[0] = 0; 125295fce210SBarry Smith for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 12539837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) outones[i] = 1; 12549566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 12559566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 125695fce210SBarry Smith for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 125776bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1258ad540459SPierre Jolivet for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 125976bd3646SJed Brown } 12609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &remote)); 126195fce210SBarry Smith for (i = 0; i < sf->nleaves; i++) { 126295fce210SBarry Smith remote[i].rank = sf->remote[i].rank; 126338e7336fSToby Isaac remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 126495fce210SBarry Smith } 12659566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1266013b3241SStefano Zampini sf->multi->multi = sf->multi; 12679566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 126895fce210SBarry Smith if (sf->rankorder) { /* Sort the ranks */ 126995fce210SBarry Smith PetscMPIInt rank; 127095fce210SBarry Smith PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 127195fce210SBarry Smith PetscSFNode *newremote; 12729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 127395fce210SBarry Smith for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 12749566063dSJacob Faibussowitsch PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 12759837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) outranks[i] = rank; 12769566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 12779566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 127895fce210SBarry Smith /* Sort the incoming ranks at each vertex, build the inverse map */ 127995fce210SBarry Smith for (i = 0; i < sf->nroots; i++) { 128095fce210SBarry Smith PetscInt j; 128195fce210SBarry Smith for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 12828e3a54c0SPierre Jolivet PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 128395fce210SBarry Smith for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 128495fce210SBarry Smith } 12859566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 12869566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 12879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 128895fce210SBarry Smith for (i = 0; i < sf->nleaves; i++) { 128995fce210SBarry Smith newremote[i].rank = sf->remote[i].rank; 129001365b40SToby Isaac newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 129195fce210SBarry Smith } 12929566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 12939566063dSJacob Faibussowitsch PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 129495fce210SBarry Smith } 12959566063dSJacob Faibussowitsch PetscCall(PetscFree3(inoffset, outones, outoffset)); 129695fce210SBarry Smith } 129795fce210SBarry Smith *multi = sf->multi; 12983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 129995fce210SBarry Smith } 130095fce210SBarry Smith 130195fce210SBarry Smith /*@C 130220662ed9SBarry Smith PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 130395fce210SBarry Smith 130495fce210SBarry Smith Collective 130595fce210SBarry Smith 13064165533cSJose E. Roman Input Parameters: 130795fce210SBarry Smith + sf - original star forest 1308ba2a7774SJunchao Zhang . nselected - number of selected roots on this process 1309ba2a7774SJunchao Zhang - selected - indices of the selected roots on this process 131095fce210SBarry Smith 13114165533cSJose E. Roman Output Parameter: 1312cd620004SJunchao Zhang . esf - new star forest 131395fce210SBarry Smith 131495fce210SBarry Smith Level: advanced 131595fce210SBarry Smith 131695fce210SBarry Smith Note: 1317cab54364SBarry Smith To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 131895fce210SBarry Smith be done by calling PetscSFGetGraph(). 131995fce210SBarry Smith 1320cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 132195fce210SBarry Smith @*/ 1322d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1323d71ae5a4SJacob Faibussowitsch { 1324cd620004SJunchao Zhang PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1325cd620004SJunchao Zhang const PetscInt *ilocal; 1326cd620004SJunchao Zhang signed char *rootdata, *leafdata, *leafmem; 1327ba2a7774SJunchao Zhang const PetscSFNode *iremote; 1328f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1329f659e5c7SJunchao Zhang MPI_Comm comm; 133095fce210SBarry Smith 133195fce210SBarry Smith PetscFunctionBegin; 133295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 133329046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 13344f572ea9SToby Isaac if (nselected) PetscAssertPointer(selected, 3); 13354f572ea9SToby Isaac PetscAssertPointer(esf, 4); 13360511a646SMatthew G. Knepley 13379566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 13389566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 13399566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 13409566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1341cd620004SJunchao Zhang 134276bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1343cd620004SJunchao Zhang PetscBool dups; 13449566063dSJacob Faibussowitsch PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 134528b400f6SJacob Faibussowitsch PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1346511e6246SStefano Zampini for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1347cd620004SJunchao Zhang } 1348f659e5c7SJunchao Zhang 1349dbbe0bcdSBarry Smith if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1350dbbe0bcdSBarry Smith else { 1351cd620004SJunchao Zhang /* A generic version of creating embedded sf */ 13529566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1353cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 13549566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 13558e3a54c0SPierre Jolivet leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1356cd620004SJunchao Zhang /* Tag selected roots and bcast to leaves */ 1357cd620004SJunchao Zhang for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 13589566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 13599566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1360ba2a7774SJunchao Zhang 1361cd620004SJunchao Zhang /* Build esf with leaves that are still connected */ 1362cd620004SJunchao Zhang esf_nleaves = 0; 1363cd620004SJunchao Zhang for (i = 0; i < nleaves; i++) { 1364cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1365cd620004SJunchao Zhang /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1366cd620004SJunchao Zhang with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1367cd620004SJunchao Zhang */ 1368cd620004SJunchao Zhang esf_nleaves += (leafdata[j] ? 1 : 0); 1369cd620004SJunchao Zhang } 13709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 13719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1372cd620004SJunchao Zhang for (i = n = 0; i < nleaves; i++) { 1373cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1374cd620004SJunchao Zhang if (leafdata[j]) { 1375cd620004SJunchao Zhang new_ilocal[n] = j; 1376cd620004SJunchao Zhang new_iremote[n].rank = iremote[i].rank; 1377cd620004SJunchao Zhang new_iremote[n].index = iremote[i].index; 1378fc1ede2bSMatthew G. Knepley ++n; 137995fce210SBarry Smith } 138095fce210SBarry Smith } 13819566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, esf)); 13829566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*esf)); 13839566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 13849566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootdata, leafmem)); 1385f659e5c7SJunchao Zhang } 13869566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 13873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 138895fce210SBarry Smith } 138995fce210SBarry Smith 13902f5fb4c2SMatthew G. Knepley /*@C 139120662ed9SBarry Smith PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 13922f5fb4c2SMatthew G. Knepley 13932f5fb4c2SMatthew G. Knepley Collective 13942f5fb4c2SMatthew G. Knepley 13954165533cSJose E. Roman Input Parameters: 13962f5fb4c2SMatthew G. Knepley + sf - original star forest 1397f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process 1398f659e5c7SJunchao Zhang - selected - indices of the selected leaves on this process 13992f5fb4c2SMatthew G. Knepley 14004165533cSJose E. Roman Output Parameter: 14012f5fb4c2SMatthew G. Knepley . newsf - new star forest 14022f5fb4c2SMatthew G. Knepley 14032f5fb4c2SMatthew G. Knepley Level: advanced 14042f5fb4c2SMatthew G. Knepley 1405cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 14062f5fb4c2SMatthew G. Knepley @*/ 1407d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1408d71ae5a4SJacob Faibussowitsch { 1409f659e5c7SJunchao Zhang const PetscSFNode *iremote; 1410f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1411f659e5c7SJunchao Zhang const PetscInt *ilocal; 1412f659e5c7SJunchao Zhang PetscInt i, nroots, *leaves, *new_ilocal; 1413f659e5c7SJunchao Zhang MPI_Comm comm; 14142f5fb4c2SMatthew G. Knepley 14152f5fb4c2SMatthew G. Knepley PetscFunctionBegin; 14162f5fb4c2SMatthew G. Knepley PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 141729046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 14184f572ea9SToby Isaac if (nselected) PetscAssertPointer(selected, 3); 14194f572ea9SToby Isaac PetscAssertPointer(newsf, 4); 14202f5fb4c2SMatthew G. Knepley 1421f659e5c7SJunchao Zhang /* Uniq selected[] and put results in leaves[] */ 14229566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 14239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &leaves)); 14249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(leaves, selected, nselected)); 14259566063dSJacob Faibussowitsch PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 142608401ef6SPierre Jolivet PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1427f659e5c7SJunchao Zhang 1428f659e5c7SJunchao Zhang /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1429dbbe0bcdSBarry Smith if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1430dbbe0bcdSBarry Smith else { 14319566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 14329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &new_ilocal)); 14339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &new_iremote)); 1434f659e5c7SJunchao Zhang for (i = 0; i < nselected; ++i) { 1435f659e5c7SJunchao Zhang const PetscInt l = leaves[i]; 1436f659e5c7SJunchao Zhang new_ilocal[i] = ilocal ? ilocal[l] : l; 1437f659e5c7SJunchao Zhang new_iremote[i].rank = iremote[l].rank; 1438f659e5c7SJunchao Zhang new_iremote[i].index = iremote[l].index; 14392f5fb4c2SMatthew G. Knepley } 14409566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 14419566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1442f659e5c7SJunchao Zhang } 14439566063dSJacob Faibussowitsch PetscCall(PetscFree(leaves)); 14443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14452f5fb4c2SMatthew G. Knepley } 14462f5fb4c2SMatthew G. Knepley 144795fce210SBarry Smith /*@C 1448cab54364SBarry Smith PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 14493482bfa8SJunchao Zhang 1450c3339decSBarry Smith Collective 14513482bfa8SJunchao Zhang 14524165533cSJose E. Roman Input Parameters: 14533482bfa8SJunchao Zhang + sf - star forest on which to communicate 14543482bfa8SJunchao Zhang . unit - data type associated with each node 14553482bfa8SJunchao Zhang . rootdata - buffer to broadcast 14563482bfa8SJunchao Zhang - op - operation to use for reduction 14573482bfa8SJunchao Zhang 14584165533cSJose E. Roman Output Parameter: 14593482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 14603482bfa8SJunchao Zhang 14613482bfa8SJunchao Zhang Level: intermediate 14623482bfa8SJunchao Zhang 146320662ed9SBarry Smith Note: 146420662ed9SBarry Smith When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1465da81f932SPierre Jolivet are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1466cab54364SBarry Smith use `PetscSFBcastWithMemTypeBegin()` instead. 1467cab54364SBarry Smith 1468cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 14693482bfa8SJunchao Zhang @*/ 1470d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1471d71ae5a4SJacob Faibussowitsch { 1472eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 14733482bfa8SJunchao Zhang 14743482bfa8SJunchao Zhang PetscFunctionBegin; 14753482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 14769566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 14779566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 14789566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 14799566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1480dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 14819566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 14823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14833482bfa8SJunchao Zhang } 14843482bfa8SJunchao Zhang 14853482bfa8SJunchao Zhang /*@C 148620662ed9SBarry Smith PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 148720662ed9SBarry Smith to `PetscSFBcastEnd()` 1488d0295fc0SJunchao Zhang 1489c3339decSBarry Smith Collective 1490d0295fc0SJunchao Zhang 14914165533cSJose E. Roman Input Parameters: 1492d0295fc0SJunchao Zhang + sf - star forest on which to communicate 1493d0295fc0SJunchao Zhang . unit - data type associated with each node 1494d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1495d0295fc0SJunchao Zhang . rootdata - buffer to broadcast 1496d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1497d0295fc0SJunchao Zhang - op - operation to use for reduction 1498d0295fc0SJunchao Zhang 14994165533cSJose E. Roman Output Parameter: 1500d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 1501d0295fc0SJunchao Zhang 1502d0295fc0SJunchao Zhang Level: intermediate 1503d0295fc0SJunchao Zhang 1504cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1505d0295fc0SJunchao Zhang @*/ 1506d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1507d71ae5a4SJacob Faibussowitsch { 1508d0295fc0SJunchao Zhang PetscFunctionBegin; 1509d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15109566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 15119566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1512dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 15139566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 15143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1515d0295fc0SJunchao Zhang } 1516d0295fc0SJunchao Zhang 1517d0295fc0SJunchao Zhang /*@C 151820662ed9SBarry Smith PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 15193482bfa8SJunchao Zhang 15203482bfa8SJunchao Zhang Collective 15213482bfa8SJunchao Zhang 15224165533cSJose E. Roman Input Parameters: 15233482bfa8SJunchao Zhang + sf - star forest 15243482bfa8SJunchao Zhang . unit - data type 15253482bfa8SJunchao Zhang . rootdata - buffer to broadcast 15263482bfa8SJunchao Zhang - op - operation to use for reduction 15273482bfa8SJunchao Zhang 15284165533cSJose E. Roman Output Parameter: 15293482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 15303482bfa8SJunchao Zhang 15313482bfa8SJunchao Zhang Level: intermediate 15323482bfa8SJunchao Zhang 1533cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 15343482bfa8SJunchao Zhang @*/ 1535d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1536d71ae5a4SJacob Faibussowitsch { 15373482bfa8SJunchao Zhang PetscFunctionBegin; 15383482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15399566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1540dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 15419566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 15423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15433482bfa8SJunchao Zhang } 15443482bfa8SJunchao Zhang 15453482bfa8SJunchao Zhang /*@C 1546cab54364SBarry Smith PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 154795fce210SBarry Smith 154895fce210SBarry Smith Collective 154995fce210SBarry Smith 15504165533cSJose E. Roman Input Parameters: 155195fce210SBarry Smith + sf - star forest 155295fce210SBarry Smith . unit - data type 155395fce210SBarry Smith . leafdata - values to reduce 155495fce210SBarry Smith - op - reduction operation 155595fce210SBarry Smith 15564165533cSJose E. Roman Output Parameter: 155795fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 155895fce210SBarry Smith 155995fce210SBarry Smith Level: intermediate 156095fce210SBarry Smith 156120662ed9SBarry Smith Note: 156220662ed9SBarry Smith When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1563da81f932SPierre Jolivet are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1564cab54364SBarry Smith use `PetscSFReduceWithMemTypeBegin()` instead. 1565d0295fc0SJunchao Zhang 156620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 156795fce210SBarry Smith @*/ 1568d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1569d71ae5a4SJacob Faibussowitsch { 1570eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 157195fce210SBarry Smith 157295fce210SBarry Smith PetscFunctionBegin; 157395fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15749566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 15759566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 15769566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 15779566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1578f4f49eeaSPierre Jolivet PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 15799566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 15803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 158195fce210SBarry Smith } 158295fce210SBarry Smith 158395fce210SBarry Smith /*@C 1584cab54364SBarry Smith PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1585d0295fc0SJunchao Zhang 1586d0295fc0SJunchao Zhang Collective 1587d0295fc0SJunchao Zhang 15884165533cSJose E. Roman Input Parameters: 1589d0295fc0SJunchao Zhang + sf - star forest 1590d0295fc0SJunchao Zhang . unit - data type 1591d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1592d0295fc0SJunchao Zhang . leafdata - values to reduce 1593d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1594d0295fc0SJunchao Zhang - op - reduction operation 1595d0295fc0SJunchao Zhang 15964165533cSJose E. Roman Output Parameter: 1597d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root 1598d0295fc0SJunchao Zhang 1599d0295fc0SJunchao Zhang Level: intermediate 1600d0295fc0SJunchao Zhang 160120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1602d0295fc0SJunchao Zhang @*/ 1603d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1604d71ae5a4SJacob Faibussowitsch { 1605d0295fc0SJunchao Zhang PetscFunctionBegin; 1606d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16079566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 16089566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1609f4f49eeaSPierre Jolivet PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 16109566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 16113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1612d0295fc0SJunchao Zhang } 1613d0295fc0SJunchao Zhang 1614d0295fc0SJunchao Zhang /*@C 161520662ed9SBarry Smith PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 161695fce210SBarry Smith 161795fce210SBarry Smith Collective 161895fce210SBarry Smith 16194165533cSJose E. Roman Input Parameters: 162095fce210SBarry Smith + sf - star forest 162195fce210SBarry Smith . unit - data type 162295fce210SBarry Smith . leafdata - values to reduce 162395fce210SBarry Smith - op - reduction operation 162495fce210SBarry Smith 16254165533cSJose E. Roman Output Parameter: 162695fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 162795fce210SBarry Smith 162895fce210SBarry Smith Level: intermediate 162995fce210SBarry Smith 163020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 163195fce210SBarry Smith @*/ 1632d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1633d71ae5a4SJacob Faibussowitsch { 163495fce210SBarry Smith PetscFunctionBegin; 163595fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16369566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1637dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 16389566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 16393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 164095fce210SBarry Smith } 164195fce210SBarry Smith 164295fce210SBarry Smith /*@C 1643cab54364SBarry Smith PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1644cab54364SBarry Smith to be completed with `PetscSFFetchAndOpEnd()` 1645a1729e3fSJunchao Zhang 1646a1729e3fSJunchao Zhang Collective 1647a1729e3fSJunchao Zhang 16484165533cSJose E. Roman Input Parameters: 1649a1729e3fSJunchao Zhang + sf - star forest 1650a1729e3fSJunchao Zhang . unit - data type 1651a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1652a1729e3fSJunchao Zhang - op - operation to use for reduction 1653a1729e3fSJunchao Zhang 16544165533cSJose E. Roman Output Parameters: 1655a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1656a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1657a1729e3fSJunchao Zhang 1658a1729e3fSJunchao Zhang Level: advanced 1659a1729e3fSJunchao Zhang 1660a1729e3fSJunchao Zhang Note: 1661a1729e3fSJunchao Zhang The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1662a1729e3fSJunchao Zhang might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1663a1729e3fSJunchao Zhang not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1664a1729e3fSJunchao Zhang integers. 1665a1729e3fSJunchao Zhang 1666cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1667a1729e3fSJunchao Zhang @*/ 1668d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1669d71ae5a4SJacob Faibussowitsch { 1670eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype, leafupdatemtype; 1671a1729e3fSJunchao Zhang 1672a1729e3fSJunchao Zhang PetscFunctionBegin; 1673a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16749566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 16759566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 16769566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 16779566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 16789566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 167908401ef6SPierre Jolivet PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1680dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 16819566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 16823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1683a1729e3fSJunchao Zhang } 1684a1729e3fSJunchao Zhang 1685a1729e3fSJunchao Zhang /*@C 1686cab54364SBarry Smith PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1687cab54364SBarry Smith applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1688d3b3e55cSJunchao Zhang 1689d3b3e55cSJunchao Zhang Collective 1690d3b3e55cSJunchao Zhang 1691d3b3e55cSJunchao Zhang Input Parameters: 1692d3b3e55cSJunchao Zhang + sf - star forest 1693d3b3e55cSJunchao Zhang . unit - data type 1694d3b3e55cSJunchao Zhang . rootmtype - memory type of rootdata 1695d3b3e55cSJunchao Zhang . leafmtype - memory type of leafdata 1696d3b3e55cSJunchao Zhang . leafdata - leaf values to use in reduction 1697d3b3e55cSJunchao Zhang . leafupdatemtype - memory type of leafupdate 1698d3b3e55cSJunchao Zhang - op - operation to use for reduction 1699d3b3e55cSJunchao Zhang 1700d3b3e55cSJunchao Zhang Output Parameters: 1701d3b3e55cSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1702d3b3e55cSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1703d3b3e55cSJunchao Zhang 1704d3b3e55cSJunchao Zhang Level: advanced 1705d3b3e55cSJunchao Zhang 1706cab54364SBarry Smith Note: 1707cab54364SBarry Smith See `PetscSFFetchAndOpBegin()` for more details. 1708d3b3e55cSJunchao Zhang 170920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1710d3b3e55cSJunchao Zhang @*/ 1711d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1712d71ae5a4SJacob Faibussowitsch { 1713d3b3e55cSJunchao Zhang PetscFunctionBegin; 1714d3b3e55cSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 17159566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 17169566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 171708401ef6SPierre Jolivet PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1718dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 17199566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 17203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1721d3b3e55cSJunchao Zhang } 1722d3b3e55cSJunchao Zhang 1723d3b3e55cSJunchao Zhang /*@C 172420662ed9SBarry Smith PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 172520662ed9SBarry Smith to fetch values from roots and update atomically by applying operation using my leaf value 1726a1729e3fSJunchao Zhang 1727a1729e3fSJunchao Zhang Collective 1728a1729e3fSJunchao Zhang 17294165533cSJose E. Roman Input Parameters: 1730a1729e3fSJunchao Zhang + sf - star forest 1731a1729e3fSJunchao Zhang . unit - data type 1732a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1733a1729e3fSJunchao Zhang - op - operation to use for reduction 1734a1729e3fSJunchao Zhang 17354165533cSJose E. Roman Output Parameters: 1736a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1737a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1738a1729e3fSJunchao Zhang 1739a1729e3fSJunchao Zhang Level: advanced 1740a1729e3fSJunchao Zhang 174120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1742a1729e3fSJunchao Zhang @*/ 1743d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1744d71ae5a4SJacob Faibussowitsch { 1745a1729e3fSJunchao Zhang PetscFunctionBegin; 1746a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 17479566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1748dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 17499566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 17503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1751a1729e3fSJunchao Zhang } 1752a1729e3fSJunchao Zhang 1753a1729e3fSJunchao Zhang /*@C 1754cab54364SBarry Smith PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 175595fce210SBarry Smith 175695fce210SBarry Smith Collective 175795fce210SBarry Smith 17584165533cSJose E. Roman Input Parameter: 175995fce210SBarry Smith . sf - star forest 176095fce210SBarry Smith 17614165533cSJose E. Roman Output Parameter: 176295fce210SBarry Smith . degree - degree of each root vertex 176395fce210SBarry Smith 176495fce210SBarry Smith Level: advanced 176595fce210SBarry Smith 1766cab54364SBarry Smith Note: 176720662ed9SBarry Smith The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1768ffe67aa5SVáclav Hapla 1769cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 177095fce210SBarry Smith @*/ 1771d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1772d71ae5a4SJacob Faibussowitsch { 177395fce210SBarry Smith PetscFunctionBegin; 177495fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 177595fce210SBarry Smith PetscSFCheckGraphSet(sf, 1); 17764f572ea9SToby Isaac PetscAssertPointer(degree, 2); 1777803bd9e8SMatthew G. Knepley if (!sf->degreeknown) { 17785b0d146aSStefano Zampini PetscInt i, nroots = sf->nroots, maxlocal; 177928b400f6SJacob Faibussowitsch PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 17805b0d146aSStefano Zampini maxlocal = sf->maxleaf - sf->minleaf + 1; 17819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nroots, &sf->degree)); 17829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 178329046d53SLisandro Dalcin for (i = 0; i < nroots; i++) sf->degree[i] = 0; 17849837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 17859566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 178695fce210SBarry Smith } 178795fce210SBarry Smith *degree = NULL; 17883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 178995fce210SBarry Smith } 179095fce210SBarry Smith 179195fce210SBarry Smith /*@C 1792cab54364SBarry Smith PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 179395fce210SBarry Smith 179495fce210SBarry Smith Collective 179595fce210SBarry Smith 17964165533cSJose E. Roman Input Parameter: 179795fce210SBarry Smith . sf - star forest 179895fce210SBarry Smith 17994165533cSJose E. Roman Output Parameter: 180095fce210SBarry Smith . degree - degree of each root vertex 180195fce210SBarry Smith 180295fce210SBarry Smith Level: developer 180395fce210SBarry Smith 1804cab54364SBarry Smith Note: 180520662ed9SBarry Smith The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1806ffe67aa5SVáclav Hapla 1807cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 180895fce210SBarry Smith @*/ 1809d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1810d71ae5a4SJacob Faibussowitsch { 181195fce210SBarry Smith PetscFunctionBegin; 181295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 181395fce210SBarry Smith PetscSFCheckGraphSet(sf, 1); 18144f572ea9SToby Isaac PetscAssertPointer(degree, 2); 181595fce210SBarry Smith if (!sf->degreeknown) { 181628b400f6SJacob Faibussowitsch PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 18179566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 18189566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->degreetmp)); 181995fce210SBarry Smith sf->degreeknown = PETSC_TRUE; 182095fce210SBarry Smith } 182195fce210SBarry Smith *degree = sf->degree; 18223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 182395fce210SBarry Smith } 182495fce210SBarry Smith 1825673100f5SVaclav Hapla /*@C 182620662ed9SBarry Smith PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 182766dfcd1aSVaclav Hapla Each multi-root is assigned index of the corresponding original root. 1828673100f5SVaclav Hapla 1829673100f5SVaclav Hapla Collective 1830673100f5SVaclav Hapla 18314165533cSJose E. Roman Input Parameters: 1832673100f5SVaclav Hapla + sf - star forest 1833cab54364SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1834673100f5SVaclav Hapla 18354165533cSJose E. Roman Output Parameters: 183620662ed9SBarry Smith + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 183720662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1838673100f5SVaclav Hapla 1839673100f5SVaclav Hapla Level: developer 1840673100f5SVaclav Hapla 1841cab54364SBarry Smith Note: 184220662ed9SBarry Smith The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1843ffe67aa5SVáclav Hapla 1844cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1845673100f5SVaclav Hapla @*/ 1846d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1847d71ae5a4SJacob Faibussowitsch { 1848673100f5SVaclav Hapla PetscSF msf; 1849673100f5SVaclav Hapla PetscInt i, j, k, nroots, nmroots; 1850673100f5SVaclav Hapla 1851673100f5SVaclav Hapla PetscFunctionBegin; 1852673100f5SVaclav Hapla PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 18539566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 18544f572ea9SToby Isaac if (nroots) PetscAssertPointer(degree, 2); 18554f572ea9SToby Isaac if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 18564f572ea9SToby Isaac PetscAssertPointer(multiRootsOrigNumbering, 4); 18579566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &msf)); 18589566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 18599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1860673100f5SVaclav Hapla for (i = 0, j = 0, k = 0; i < nroots; i++) { 1861673100f5SVaclav Hapla if (!degree[i]) continue; 1862ad540459SPierre Jolivet for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1863673100f5SVaclav Hapla } 186408401ef6SPierre Jolivet PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 186566dfcd1aSVaclav Hapla if (nMultiRoots) *nMultiRoots = nmroots; 18663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1867673100f5SVaclav Hapla } 1868673100f5SVaclav Hapla 186995fce210SBarry Smith /*@C 1870cab54364SBarry Smith PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 187195fce210SBarry Smith 187295fce210SBarry Smith Collective 187395fce210SBarry Smith 18744165533cSJose E. Roman Input Parameters: 187595fce210SBarry Smith + sf - star forest 187695fce210SBarry Smith . unit - data type 187795fce210SBarry Smith - leafdata - leaf data to gather to roots 187895fce210SBarry Smith 18794165533cSJose E. Roman Output Parameter: 188095fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 188195fce210SBarry Smith 188295fce210SBarry Smith Level: intermediate 188395fce210SBarry Smith 1884cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 188595fce210SBarry Smith @*/ 1886d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1887d71ae5a4SJacob Faibussowitsch { 1888a5526d50SJunchao Zhang PetscSF multi = NULL; 188995fce210SBarry Smith 189095fce210SBarry Smith PetscFunctionBegin; 189195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 18929566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 18939566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 18949566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 18953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 189695fce210SBarry Smith } 189795fce210SBarry Smith 189895fce210SBarry Smith /*@C 1899cab54364SBarry Smith PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 190095fce210SBarry Smith 190195fce210SBarry Smith Collective 190295fce210SBarry Smith 19034165533cSJose E. Roman Input Parameters: 190495fce210SBarry Smith + sf - star forest 190595fce210SBarry Smith . unit - data type 190695fce210SBarry Smith - leafdata - leaf data to gather to roots 190795fce210SBarry Smith 19084165533cSJose E. Roman Output Parameter: 190995fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 191095fce210SBarry Smith 191195fce210SBarry Smith Level: intermediate 191295fce210SBarry Smith 1913cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 191495fce210SBarry Smith @*/ 1915d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1916d71ae5a4SJacob Faibussowitsch { 1917a5526d50SJunchao Zhang PetscSF multi = NULL; 191895fce210SBarry Smith 191995fce210SBarry Smith PetscFunctionBegin; 192095fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 19219566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 19229566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 19233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 192495fce210SBarry Smith } 192595fce210SBarry Smith 192695fce210SBarry Smith /*@C 1927cab54364SBarry Smith PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 192895fce210SBarry Smith 192995fce210SBarry Smith Collective 193095fce210SBarry Smith 19314165533cSJose E. Roman Input Parameters: 193295fce210SBarry Smith + sf - star forest 193395fce210SBarry Smith . unit - data type 193495fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 193595fce210SBarry Smith 19364165533cSJose E. Roman Output Parameter: 193795fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 193895fce210SBarry Smith 193995fce210SBarry Smith Level: intermediate 194095fce210SBarry Smith 194120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 194295fce210SBarry Smith @*/ 1943d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1944d71ae5a4SJacob Faibussowitsch { 1945a5526d50SJunchao Zhang PetscSF multi = NULL; 194695fce210SBarry Smith 194795fce210SBarry Smith PetscFunctionBegin; 194895fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 19499566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 19509566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 19519566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 19523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 195395fce210SBarry Smith } 195495fce210SBarry Smith 195595fce210SBarry Smith /*@C 1956cab54364SBarry Smith PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 195795fce210SBarry Smith 195895fce210SBarry Smith Collective 195995fce210SBarry Smith 19604165533cSJose E. Roman Input Parameters: 196195fce210SBarry Smith + sf - star forest 196295fce210SBarry Smith . unit - data type 196395fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 196495fce210SBarry Smith 19654165533cSJose E. Roman Output Parameter: 196695fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 196795fce210SBarry Smith 196895fce210SBarry Smith Level: intermediate 196995fce210SBarry Smith 197020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 197195fce210SBarry Smith @*/ 1972d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1973d71ae5a4SJacob Faibussowitsch { 1974a5526d50SJunchao Zhang PetscSF multi = NULL; 197595fce210SBarry Smith 197695fce210SBarry Smith PetscFunctionBegin; 197795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 19789566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 19799566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 19803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 198195fce210SBarry Smith } 1982a7b3aa13SAta Mesgarnejad 1983d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1984d71ae5a4SJacob Faibussowitsch { 1985a072220fSLawrence Mitchell PetscInt i, n, nleaves; 1986a072220fSLawrence Mitchell const PetscInt *ilocal = NULL; 1987a072220fSLawrence Mitchell PetscHSetI seen; 1988a072220fSLawrence Mitchell 1989a072220fSLawrence Mitchell PetscFunctionBegin; 1990b458e8f1SJose E. Roman if (PetscDefined(USE_DEBUG)) { 19919566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 19929566063dSJacob Faibussowitsch PetscCall(PetscHSetICreate(&seen)); 1993a072220fSLawrence Mitchell for (i = 0; i < nleaves; i++) { 1994a072220fSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 19959566063dSJacob Faibussowitsch PetscCall(PetscHSetIAdd(seen, leaf)); 1996a072220fSLawrence Mitchell } 19979566063dSJacob Faibussowitsch PetscCall(PetscHSetIGetSize(seen, &n)); 199808401ef6SPierre Jolivet PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 19999566063dSJacob Faibussowitsch PetscCall(PetscHSetIDestroy(&seen)); 2000b458e8f1SJose E. Roman } 20013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2002a072220fSLawrence Mitchell } 200354729392SStefano Zampini 2004a7b3aa13SAta Mesgarnejad /*@ 2005cab54364SBarry Smith PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2006a7b3aa13SAta Mesgarnejad 2007a7b3aa13SAta Mesgarnejad Input Parameters: 2008cab54364SBarry Smith + sfA - The first `PetscSF` 2009cab54364SBarry Smith - sfB - The second `PetscSF` 2010a7b3aa13SAta Mesgarnejad 20112fe279fdSBarry Smith Output Parameter: 2012cab54364SBarry Smith . sfBA - The composite `PetscSF` 2013a7b3aa13SAta Mesgarnejad 2014a7b3aa13SAta Mesgarnejad Level: developer 2015a7b3aa13SAta Mesgarnejad 2016a072220fSLawrence Mitchell Notes: 2017cab54364SBarry Smith Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 201854729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. 201954729392SStefano Zampini 202020662ed9SBarry Smith `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 202120662ed9SBarry Smith a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 202220662ed9SBarry Smith nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 202320662ed9SBarry Smith `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2024a072220fSLawrence Mitchell 2025db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2026a7b3aa13SAta Mesgarnejad @*/ 2027d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2028d71ae5a4SJacob Faibussowitsch { 2029a7b3aa13SAta Mesgarnejad const PetscSFNode *remotePointsA, *remotePointsB; 2030d41018fbSJunchao Zhang PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 203154729392SStefano Zampini const PetscInt *localPointsA, *localPointsB; 203254729392SStefano Zampini PetscInt *localPointsBA; 203354729392SStefano Zampini PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 203454729392SStefano Zampini PetscBool denseB; 2035a7b3aa13SAta Mesgarnejad 2036a7b3aa13SAta Mesgarnejad PetscFunctionBegin; 2037a7b3aa13SAta Mesgarnejad PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 203829046d53SLisandro Dalcin PetscSFCheckGraphSet(sfA, 1); 203929046d53SLisandro Dalcin PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 204029046d53SLisandro Dalcin PetscSFCheckGraphSet(sfB, 2); 204154729392SStefano Zampini PetscCheckSameComm(sfA, 1, sfB, 2); 20424f572ea9SToby Isaac PetscAssertPointer(sfBA, 3); 20439566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 20449566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 204554729392SStefano Zampini 20469566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 20479566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 204820662ed9SBarry Smith /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 204920662ed9SBarry Smith numRootsB; otherwise, garbage will be broadcasted. 205020662ed9SBarry Smith Example (comm size = 1): 205120662ed9SBarry Smith sfA: 0 <- (0, 0) 205220662ed9SBarry Smith sfB: 100 <- (0, 0) 205320662ed9SBarry Smith 101 <- (0, 1) 205420662ed9SBarry Smith Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 205520662ed9SBarry Smith of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 205620662ed9SBarry Smith receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 205720662ed9SBarry Smith remotePointsA; if not recasted, point 101 would receive a garbage value. */ 20589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 205954729392SStefano Zampini for (i = 0; i < numRootsB; i++) { 206054729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 206154729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 206254729392SStefano Zampini } 206354729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 20640ea77edaSksagiyam PetscInt localp = localPointsA ? localPointsA[i] : i; 20650ea77edaSksagiyam 20660ea77edaSksagiyam if (localp >= numRootsB) continue; 20670ea77edaSksagiyam reorderedRemotePointsA[localp] = remotePointsA[i]; 206854729392SStefano Zampini } 2069d41018fbSJunchao Zhang remotePointsA = reorderedRemotePointsA; 20709566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 20719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 20720ea77edaSksagiyam for (i = 0; i < maxleaf - minleaf + 1; i++) { 20730ea77edaSksagiyam leafdataB[i].rank = -1; 20740ea77edaSksagiyam leafdataB[i].index = -1; 20750ea77edaSksagiyam } 20768e3a54c0SPierre Jolivet PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 20778e3a54c0SPierre Jolivet PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 20789566063dSJacob Faibussowitsch PetscCall(PetscFree(reorderedRemotePointsA)); 2079d41018fbSJunchao Zhang 208054729392SStefano Zampini denseB = (PetscBool)!localPointsB; 208154729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 208254729392SStefano Zampini if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 208354729392SStefano Zampini else numLeavesBA++; 208454729392SStefano Zampini } 208554729392SStefano Zampini if (denseB) { 2086d41018fbSJunchao Zhang localPointsBA = NULL; 2087d41018fbSJunchao Zhang remotePointsBA = leafdataB; 2088d41018fbSJunchao Zhang } else { 20899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 20909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 209154729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 209254729392SStefano Zampini const PetscInt l = localPointsB ? localPointsB[i] : i; 209354729392SStefano Zampini 209454729392SStefano Zampini if (leafdataB[l - minleaf].rank == -1) continue; 209554729392SStefano Zampini remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 209654729392SStefano Zampini localPointsBA[numLeavesBA] = l; 209754729392SStefano Zampini numLeavesBA++; 209854729392SStefano Zampini } 20999566063dSJacob Faibussowitsch PetscCall(PetscFree(leafdataB)); 2100d41018fbSJunchao Zhang } 21019566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 21029566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*sfBA)); 21039566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 21043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2105a7b3aa13SAta Mesgarnejad } 21061c6ba672SJunchao Zhang 210704c0ada0SJunchao Zhang /*@ 2108cab54364SBarry Smith PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 210904c0ada0SJunchao Zhang 211004c0ada0SJunchao Zhang Input Parameters: 2111cab54364SBarry Smith + sfA - The first `PetscSF` 2112cab54364SBarry Smith - sfB - The second `PetscSF` 211304c0ada0SJunchao Zhang 21142fe279fdSBarry Smith Output Parameter: 2115cab54364SBarry Smith . sfBA - The composite `PetscSF`. 211604c0ada0SJunchao Zhang 211704c0ada0SJunchao Zhang Level: developer 211804c0ada0SJunchao Zhang 211954729392SStefano Zampini Notes: 212020662ed9SBarry Smith Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 212154729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 212220662ed9SBarry Smith second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 212354729392SStefano Zampini 212420662ed9SBarry Smith `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 212520662ed9SBarry Smith a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 212620662ed9SBarry Smith roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 212720662ed9SBarry Smith on `sfA`, then 212820662ed9SBarry Smith a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 212954729392SStefano Zampini 2130db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 213104c0ada0SJunchao Zhang @*/ 2132d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2133d71ae5a4SJacob Faibussowitsch { 213404c0ada0SJunchao Zhang const PetscSFNode *remotePointsA, *remotePointsB; 213504c0ada0SJunchao Zhang PetscSFNode *remotePointsBA; 213604c0ada0SJunchao Zhang const PetscInt *localPointsA, *localPointsB; 213754729392SStefano Zampini PetscSFNode *reorderedRemotePointsA = NULL; 213854729392SStefano Zampini PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 21395b0d146aSStefano Zampini MPI_Op op; 21405b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 21415b0d146aSStefano Zampini PetscBool iswin; 21425b0d146aSStefano Zampini #endif 214304c0ada0SJunchao Zhang 214404c0ada0SJunchao Zhang PetscFunctionBegin; 214504c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 214604c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfA, 1); 214704c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 214804c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfB, 2); 214954729392SStefano Zampini PetscCheckSameComm(sfA, 1, sfB, 2); 21504f572ea9SToby Isaac PetscAssertPointer(sfBA, 3); 21519566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 21529566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 215354729392SStefano Zampini 21549566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 21559566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 21565b0d146aSStefano Zampini 21575b0d146aSStefano Zampini /* TODO: Check roots of sfB have degree of 1 */ 21585b0d146aSStefano Zampini /* Once we implement it, we can replace the MPI_MAXLOC 215983df288dSJunchao Zhang with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 21605b0d146aSStefano Zampini We use MPI_MAXLOC only to have a deterministic output from this routine if 21615b0d146aSStefano Zampini the root condition is not meet. 21625b0d146aSStefano Zampini */ 21635b0d146aSStefano Zampini op = MPI_MAXLOC; 21645b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 21655b0d146aSStefano Zampini /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 21669566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 216783df288dSJunchao Zhang if (iswin) op = MPI_REPLACE; 21685b0d146aSStefano Zampini #endif 21695b0d146aSStefano Zampini 21709566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 21719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 217254729392SStefano Zampini for (i = 0; i < maxleaf - minleaf + 1; i++) { 217354729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 217454729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 217554729392SStefano Zampini } 217654729392SStefano Zampini if (localPointsA) { 217754729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 217854729392SStefano Zampini if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 217954729392SStefano Zampini reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 218054729392SStefano Zampini } 218154729392SStefano Zampini } else { 218254729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 218354729392SStefano Zampini if (i > maxleaf || i < minleaf) continue; 218454729392SStefano Zampini reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 218554729392SStefano Zampini } 218654729392SStefano Zampini } 218754729392SStefano Zampini 21889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 21899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 219054729392SStefano Zampini for (i = 0; i < numRootsB; i++) { 219154729392SStefano Zampini remotePointsBA[i].rank = -1; 219254729392SStefano Zampini remotePointsBA[i].index = -1; 219354729392SStefano Zampini } 219454729392SStefano Zampini 21958e3a54c0SPierre Jolivet PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 21968e3a54c0SPierre Jolivet PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 21979566063dSJacob Faibussowitsch PetscCall(PetscFree(reorderedRemotePointsA)); 219854729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 219954729392SStefano Zampini if (remotePointsBA[i].rank == -1) continue; 220054729392SStefano Zampini remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 220154729392SStefano Zampini remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 220254729392SStefano Zampini localPointsBA[numLeavesBA] = i; 220354729392SStefano Zampini numLeavesBA++; 220454729392SStefano Zampini } 22059566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 22069566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*sfBA)); 22079566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 22083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 220904c0ada0SJunchao Zhang } 221004c0ada0SJunchao Zhang 22111c6ba672SJunchao Zhang /* 2212cab54364SBarry Smith PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 22131c6ba672SJunchao Zhang 22142fe279fdSBarry Smith Input Parameter: 2215cab54364SBarry Smith . sf - The global `PetscSF` 22161c6ba672SJunchao Zhang 22172fe279fdSBarry Smith Output Parameter: 2218cab54364SBarry Smith . out - The local `PetscSF` 2219cab54364SBarry Smith 2220cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()` 22211c6ba672SJunchao Zhang */ 2222d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2223d71ae5a4SJacob Faibussowitsch { 22241c6ba672SJunchao Zhang MPI_Comm comm; 22251c6ba672SJunchao Zhang PetscMPIInt myrank; 22261c6ba672SJunchao Zhang const PetscInt *ilocal; 22271c6ba672SJunchao Zhang const PetscSFNode *iremote; 22281c6ba672SJunchao Zhang PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 22291c6ba672SJunchao Zhang PetscSFNode *liremote; 22301c6ba672SJunchao Zhang PetscSF lsf; 22311c6ba672SJunchao Zhang 22321c6ba672SJunchao Zhang PetscFunctionBegin; 22331c6ba672SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2234dbbe0bcdSBarry Smith if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2235dbbe0bcdSBarry Smith else { 22361c6ba672SJunchao Zhang /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 22379566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 22389566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 22391c6ba672SJunchao Zhang 22401c6ba672SJunchao Zhang /* Find out local edges and build a local SF */ 22419566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 22429371c9d4SSatish Balay for (i = lnleaves = 0; i < nleaves; i++) { 22439371c9d4SSatish Balay if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 22449371c9d4SSatish Balay } 22459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lnleaves, &lilocal)); 22469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lnleaves, &liremote)); 22471c6ba672SJunchao Zhang 22481c6ba672SJunchao Zhang for (i = j = 0; i < nleaves; i++) { 22491c6ba672SJunchao Zhang if (iremote[i].rank == (PetscInt)myrank) { 22501c6ba672SJunchao Zhang lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 22511c6ba672SJunchao Zhang liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 22521c6ba672SJunchao Zhang liremote[j].index = iremote[i].index; 22531c6ba672SJunchao Zhang j++; 22541c6ba672SJunchao Zhang } 22551c6ba672SJunchao Zhang } 22569566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 22579566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(lsf)); 22589566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 22599566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(lsf)); 22601c6ba672SJunchao Zhang *out = lsf; 22611c6ba672SJunchao Zhang } 22623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 22631c6ba672SJunchao Zhang } 2264dd5b3ca6SJunchao Zhang 2265dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2266d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2267d71ae5a4SJacob Faibussowitsch { 2268eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 2269dd5b3ca6SJunchao Zhang 2270dd5b3ca6SJunchao Zhang PetscFunctionBegin; 2271dd5b3ca6SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 22729566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 22739566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 22749566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 22759566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2276dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 22779566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 22783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2279dd5b3ca6SJunchao Zhang } 2280dd5b3ca6SJunchao Zhang 2281157edd7aSVaclav Hapla /*@ 2282cab54364SBarry Smith PetscSFConcatenate - concatenate multiple `PetscSF` into one 2283157edd7aSVaclav Hapla 2284157edd7aSVaclav Hapla Input Parameters: 2285157edd7aSVaclav Hapla + comm - the communicator 2286cab54364SBarry Smith . nsfs - the number of input `PetscSF` 2287cab54364SBarry Smith . sfs - the array of input `PetscSF` 22881f40158dSVaclav Hapla . rootMode - the root mode specifying how roots are handled 228920662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2290157edd7aSVaclav Hapla 22912fe279fdSBarry Smith Output Parameter: 2292cab54364SBarry Smith . newsf - The resulting `PetscSF` 2293157edd7aSVaclav Hapla 22941f40158dSVaclav Hapla Level: advanced 2295157edd7aSVaclav Hapla 2296157edd7aSVaclav Hapla Notes: 229720662ed9SBarry Smith The communicator of all `PetscSF`s in `sfs` must be comm. 2298157edd7aSVaclav Hapla 229920662ed9SBarry Smith Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 230020662ed9SBarry Smith 230120662ed9SBarry Smith The offsets in `leafOffsets` are added to the original leaf indices. 230220662ed9SBarry Smith 230320662ed9SBarry Smith If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 230420662ed9SBarry Smith In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 230520662ed9SBarry Smith 230620662ed9SBarry Smith If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2307157edd7aSVaclav Hapla In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2308157edd7aSVaclav Hapla 230920662ed9SBarry Smith All root modes retain the essential connectivity condition. 231020662ed9SBarry Smith If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 231120662ed9SBarry Smith Parameter `rootMode` controls how the input root spaces are combined. 231220662ed9SBarry Smith For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 231320662ed9SBarry Smith and is also the same in the output `PetscSF`. 23141f40158dSVaclav Hapla For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 23151f40158dSVaclav Hapla `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 231620662ed9SBarry Smith roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 23171f40158dSVaclav Hapla `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 23181593df67SStefano Zampini roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 23191f40158dSVaclav Hapla the original root ranks are ignored. 23201f40158dSVaclav Hapla For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 232120662ed9SBarry Smith the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 232220662ed9SBarry Smith to keep the load balancing. 232320662ed9SBarry Smith However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 23241f40158dSVaclav Hapla 23251f40158dSVaclav Hapla Example: 23261f40158dSVaclav Hapla We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 232720662ed9SBarry Smith .vb 232820662ed9SBarry Smith make -C $PETSC_DIR/src/vec/is/sf/tests ex18 232920662ed9SBarry Smith for m in {local,global,shared}; do 233020662ed9SBarry Smith mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 233120662ed9SBarry Smith done 233220662ed9SBarry Smith .ve 233320662ed9SBarry Smith we generate two identical `PetscSF`s sf_0 and sf_1, 233420662ed9SBarry Smith .vb 233520662ed9SBarry Smith PetscSF Object: sf_0 2 MPI processes 233620662ed9SBarry Smith type: basic 233720662ed9SBarry Smith rank #leaves #roots 233820662ed9SBarry Smith [ 0] 4 2 233920662ed9SBarry Smith [ 1] 4 2 234020662ed9SBarry Smith leaves roots roots in global numbering 234120662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0 234220662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1 234320662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 2 234420662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 3 234520662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0 234620662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1 234720662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 2 234820662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 3 234920662ed9SBarry Smith .ve 2350e33f79d8SJacob Faibussowitsch and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 235120662ed9SBarry Smith .vb 235220662ed9SBarry Smith rootMode = local: 235320662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes 235420662ed9SBarry Smith type: basic 235520662ed9SBarry Smith rank #leaves #roots 235620662ed9SBarry Smith [ 0] 8 4 235720662ed9SBarry Smith [ 1] 8 4 235820662ed9SBarry Smith leaves roots roots in global numbering 235920662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0 236020662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1 236120662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 4 236220662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 5 236320662ed9SBarry Smith ( 0, 4) <- ( 0, 2) = 2 236420662ed9SBarry Smith ( 0, 5) <- ( 0, 3) = 3 236520662ed9SBarry Smith ( 0, 6) <- ( 1, 2) = 6 236620662ed9SBarry Smith ( 0, 7) <- ( 1, 3) = 7 236720662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0 236820662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1 236920662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 4 237020662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 5 237120662ed9SBarry Smith ( 1, 4) <- ( 0, 2) = 2 237220662ed9SBarry Smith ( 1, 5) <- ( 0, 3) = 3 237320662ed9SBarry Smith ( 1, 6) <- ( 1, 2) = 6 237420662ed9SBarry Smith ( 1, 7) <- ( 1, 3) = 7 237520662ed9SBarry Smith 237620662ed9SBarry Smith rootMode = global: 237720662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes 237820662ed9SBarry Smith type: basic 237920662ed9SBarry Smith rank #leaves #roots 238020662ed9SBarry Smith [ 0] 8 4 238120662ed9SBarry Smith [ 1] 8 4 238220662ed9SBarry Smith leaves roots roots in global numbering 238320662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0 238420662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1 238520662ed9SBarry Smith ( 0, 2) <- ( 0, 2) = 2 238620662ed9SBarry Smith ( 0, 3) <- ( 0, 3) = 3 238720662ed9SBarry Smith ( 0, 4) <- ( 1, 0) = 4 238820662ed9SBarry Smith ( 0, 5) <- ( 1, 1) = 5 238920662ed9SBarry Smith ( 0, 6) <- ( 1, 2) = 6 239020662ed9SBarry Smith ( 0, 7) <- ( 1, 3) = 7 239120662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0 239220662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1 239320662ed9SBarry Smith ( 1, 2) <- ( 0, 2) = 2 239420662ed9SBarry Smith ( 1, 3) <- ( 0, 3) = 3 239520662ed9SBarry Smith ( 1, 4) <- ( 1, 0) = 4 239620662ed9SBarry Smith ( 1, 5) <- ( 1, 1) = 5 239720662ed9SBarry Smith ( 1, 6) <- ( 1, 2) = 6 239820662ed9SBarry Smith ( 1, 7) <- ( 1, 3) = 7 239920662ed9SBarry Smith 240020662ed9SBarry Smith rootMode = shared: 240120662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes 240220662ed9SBarry Smith type: basic 240320662ed9SBarry Smith rank #leaves #roots 240420662ed9SBarry Smith [ 0] 8 2 240520662ed9SBarry Smith [ 1] 8 2 240620662ed9SBarry Smith leaves roots roots in global numbering 240720662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0 240820662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1 240920662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 2 241020662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 3 241120662ed9SBarry Smith ( 0, 4) <- ( 0, 0) = 0 241220662ed9SBarry Smith ( 0, 5) <- ( 0, 1) = 1 241320662ed9SBarry Smith ( 0, 6) <- ( 1, 0) = 2 241420662ed9SBarry Smith ( 0, 7) <- ( 1, 1) = 3 241520662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0 241620662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1 241720662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 2 241820662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 3 241920662ed9SBarry Smith ( 1, 4) <- ( 0, 0) = 0 242020662ed9SBarry Smith ( 1, 5) <- ( 0, 1) = 1 242120662ed9SBarry Smith ( 1, 6) <- ( 1, 0) = 2 242220662ed9SBarry Smith ( 1, 7) <- ( 1, 1) = 3 242320662ed9SBarry Smith .ve 24241f40158dSVaclav Hapla 24251f40158dSVaclav Hapla .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2426157edd7aSVaclav Hapla @*/ 24271f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2428d71ae5a4SJacob Faibussowitsch { 2429157edd7aSVaclav Hapla PetscInt i, s, nLeaves, nRoots; 2430157edd7aSVaclav Hapla PetscInt *leafArrayOffsets; 2431157edd7aSVaclav Hapla PetscInt *ilocal_new; 2432157edd7aSVaclav Hapla PetscSFNode *iremote_new; 2433157edd7aSVaclav Hapla PetscBool all_ilocal_null = PETSC_FALSE; 24341f40158dSVaclav Hapla PetscLayout glayout = NULL; 24351f40158dSVaclav Hapla PetscInt *gremote = NULL; 24361f40158dSVaclav Hapla PetscMPIInt rank, size; 2437157edd7aSVaclav Hapla 2438157edd7aSVaclav Hapla PetscFunctionBegin; 243912f479c1SVaclav Hapla if (PetscDefined(USE_DEBUG)) { 2440157edd7aSVaclav Hapla PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2441157edd7aSVaclav Hapla 24429566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &dummy)); 2443157edd7aSVaclav Hapla PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 24444f572ea9SToby Isaac PetscAssertPointer(sfs, 3); 2445157edd7aSVaclav Hapla for (i = 0; i < nsfs; i++) { 2446157edd7aSVaclav Hapla PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2447157edd7aSVaclav Hapla PetscCheckSameComm(dummy, 1, sfs[i], 3); 2448157edd7aSVaclav Hapla } 24491f40158dSVaclav Hapla PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 24504f572ea9SToby Isaac if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 24514f572ea9SToby Isaac PetscAssertPointer(newsf, 6); 24529566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&dummy)); 2453157edd7aSVaclav Hapla } 2454157edd7aSVaclav Hapla if (!nsfs) { 24559566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, newsf)); 24569566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 24573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2458157edd7aSVaclav Hapla } 24599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 24601f40158dSVaclav Hapla PetscCallMPI(MPI_Comm_size(comm, &size)); 2461157edd7aSVaclav Hapla 24621f40158dSVaclav Hapla /* Calculate leaf array offsets */ 24639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2464157edd7aSVaclav Hapla leafArrayOffsets[0] = 0; 2465157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2466157edd7aSVaclav Hapla PetscInt nl; 2467157edd7aSVaclav Hapla 24689566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2469157edd7aSVaclav Hapla leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2470157edd7aSVaclav Hapla } 2471157edd7aSVaclav Hapla nLeaves = leafArrayOffsets[nsfs]; 2472157edd7aSVaclav Hapla 24731f40158dSVaclav Hapla /* Calculate number of roots */ 24741f40158dSVaclav Hapla switch (rootMode) { 24751f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 24761f40158dSVaclav Hapla PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 24771f40158dSVaclav Hapla if (PetscDefined(USE_DEBUG)) { 24781f40158dSVaclav Hapla for (s = 1; s < nsfs; s++) { 24791f40158dSVaclav Hapla PetscInt nr; 24801f40158dSVaclav Hapla 24811f40158dSVaclav Hapla PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 24821f40158dSVaclav Hapla PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 24831f40158dSVaclav Hapla } 24841f40158dSVaclav Hapla } 24851f40158dSVaclav Hapla } break; 24861f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 24871f40158dSVaclav Hapla /* Calculate also global layout in this case */ 24881f40158dSVaclav Hapla PetscInt *nls; 24891f40158dSVaclav Hapla PetscLayout *lts; 24901f40158dSVaclav Hapla PetscInt **inds; 24911f40158dSVaclav Hapla PetscInt j; 24921f40158dSVaclav Hapla PetscInt rootOffset = 0; 24931f40158dSVaclav Hapla 24941f40158dSVaclav Hapla PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 24951f40158dSVaclav Hapla PetscCall(PetscLayoutCreate(comm, &glayout)); 24961f40158dSVaclav Hapla glayout->bs = 1; 24971f40158dSVaclav Hapla glayout->n = 0; 24981f40158dSVaclav Hapla glayout->N = 0; 24991f40158dSVaclav Hapla for (s = 0; s < nsfs; s++) { 25001f40158dSVaclav Hapla PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 25011f40158dSVaclav Hapla glayout->n += lts[s]->n; 25021f40158dSVaclav Hapla glayout->N += lts[s]->N; 25031f40158dSVaclav Hapla } 25041f40158dSVaclav Hapla PetscCall(PetscLayoutSetUp(glayout)); 25051f40158dSVaclav Hapla PetscCall(PetscMalloc1(nLeaves, &gremote)); 25061f40158dSVaclav Hapla for (s = 0, j = 0; s < nsfs; s++) { 25071f40158dSVaclav Hapla for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 25081f40158dSVaclav Hapla rootOffset += lts[s]->N; 25091f40158dSVaclav Hapla PetscCall(PetscLayoutDestroy(<s[s])); 25101f40158dSVaclav Hapla PetscCall(PetscFree(inds[s])); 25111f40158dSVaclav Hapla } 25121f40158dSVaclav Hapla PetscCall(PetscFree3(lts, nls, inds)); 25131f40158dSVaclav Hapla nRoots = glayout->N; 25141f40158dSVaclav Hapla } break; 25151f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 25161f40158dSVaclav Hapla /* nRoots calculated later in this case */ 25171f40158dSVaclav Hapla break; 25181f40158dSVaclav Hapla default: 25191f40158dSVaclav Hapla SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 25201f40158dSVaclav Hapla } 25211f40158dSVaclav Hapla 2522157edd7aSVaclav Hapla if (!leafOffsets) { 2523157edd7aSVaclav Hapla all_ilocal_null = PETSC_TRUE; 2524157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2525157edd7aSVaclav Hapla const PetscInt *ilocal; 2526157edd7aSVaclav Hapla 25279566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2528157edd7aSVaclav Hapla if (ilocal) { 2529157edd7aSVaclav Hapla all_ilocal_null = PETSC_FALSE; 2530157edd7aSVaclav Hapla break; 2531157edd7aSVaclav Hapla } 2532157edd7aSVaclav Hapla } 2533157edd7aSVaclav Hapla PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2534157edd7aSVaclav Hapla } 2535157edd7aSVaclav Hapla 2536157edd7aSVaclav Hapla /* Renumber and concatenate local leaves */ 2537157edd7aSVaclav Hapla ilocal_new = NULL; 2538157edd7aSVaclav Hapla if (!all_ilocal_null) { 25399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2540157edd7aSVaclav Hapla for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2541157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2542157edd7aSVaclav Hapla const PetscInt *ilocal; 25438e3a54c0SPierre Jolivet PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2544157edd7aSVaclav Hapla PetscInt i, nleaves_l; 2545157edd7aSVaclav Hapla 25469566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2547157edd7aSVaclav Hapla for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2548157edd7aSVaclav Hapla } 2549157edd7aSVaclav Hapla } 2550157edd7aSVaclav Hapla 2551157edd7aSVaclav Hapla /* Renumber and concatenate remote roots */ 25521f40158dSVaclav Hapla if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 25531f40158dSVaclav Hapla PetscInt rootOffset = 0; 25541f40158dSVaclav Hapla 25559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2556157edd7aSVaclav Hapla for (i = 0; i < nLeaves; i++) { 2557157edd7aSVaclav Hapla iremote_new[i].rank = -1; 2558157edd7aSVaclav Hapla iremote_new[i].index = -1; 2559157edd7aSVaclav Hapla } 2560157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2561157edd7aSVaclav Hapla PetscInt i, nl, nr; 2562157edd7aSVaclav Hapla PetscSF tmp_sf; 2563157edd7aSVaclav Hapla const PetscSFNode *iremote; 2564157edd7aSVaclav Hapla PetscSFNode *tmp_rootdata; 25658e3a54c0SPierre Jolivet PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2566157edd7aSVaclav Hapla 25679566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 25689566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &tmp_sf)); 2569157edd7aSVaclav Hapla /* create helper SF with contiguous leaves */ 25709566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 25719566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(tmp_sf)); 25729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 25731f40158dSVaclav Hapla if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2574157edd7aSVaclav Hapla for (i = 0; i < nr; i++) { 25751f40158dSVaclav Hapla tmp_rootdata[i].index = i + rootOffset; 2576157edd7aSVaclav Hapla tmp_rootdata[i].rank = (PetscInt)rank; 2577157edd7aSVaclav Hapla } 25781f40158dSVaclav Hapla rootOffset += nr; 25791f40158dSVaclav Hapla } else { 25801f40158dSVaclav Hapla for (i = 0; i < nr; i++) { 25811f40158dSVaclav Hapla tmp_rootdata[i].index = i; 25821f40158dSVaclav Hapla tmp_rootdata[i].rank = (PetscInt)rank; 25831f40158dSVaclav Hapla } 25841f40158dSVaclav Hapla } 25859566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 25869566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 25879566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&tmp_sf)); 25889566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp_rootdata)); 2589157edd7aSVaclav Hapla } 2590aa624791SPierre Jolivet if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2591157edd7aSVaclav Hapla 2592157edd7aSVaclav Hapla /* Build the new SF */ 25939566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, newsf)); 25949566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 25951f40158dSVaclav Hapla } else { 25961f40158dSVaclav Hapla /* Build the new SF */ 25971f40158dSVaclav Hapla PetscCall(PetscSFCreate(comm, newsf)); 25981f40158dSVaclav Hapla PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 25991f40158dSVaclav Hapla } 26009566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(*newsf)); 26011f40158dSVaclav Hapla PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 26021f40158dSVaclav Hapla PetscCall(PetscLayoutDestroy(&glayout)); 26031f40158dSVaclav Hapla PetscCall(PetscFree(gremote)); 26049566063dSJacob Faibussowitsch PetscCall(PetscFree(leafArrayOffsets)); 26053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2606157edd7aSVaclav Hapla } 26078e54d7e8SToby Isaac 26088e54d7e8SToby Isaac /*@ 26098e54d7e8SToby Isaac PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications. 26108e54d7e8SToby Isaac 26118e54d7e8SToby Isaac Collective 26128e54d7e8SToby Isaac 26138e54d7e8SToby Isaac Input Parameters: 26148e54d7e8SToby Isaac + sf - star forest 26158e54d7e8SToby Isaac . unit - the data type contained within the root and leaf data 26168e54d7e8SToby Isaac . rootdata - root data that will be used for muliple PetscSF communications 26178e54d7e8SToby Isaac - leafdata - leaf data that will be used for muliple PetscSF communications 26188e54d7e8SToby Isaac 26198e54d7e8SToby Isaac Level: advanced 26208e54d7e8SToby Isaac 26218e54d7e8SToby Isaac Notes: 26228e54d7e8SToby Isaac Implementations of `PetscSF` can make optimizations 26238e54d7e8SToby Isaac for repeated communication using the same memory regions, but these optimizations 26248e54d7e8SToby Isaac can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 26258e54d7e8SToby Isaac The intended pattern is 26268e54d7e8SToby Isaac 26278e54d7e8SToby Isaac .vb 26288e54d7e8SToby Isaac PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 26298e54d7e8SToby Isaac 26308e54d7e8SToby Isaac PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 26318e54d7e8SToby Isaac // repeated use of rootdata and leafdata will now be optimized 26328e54d7e8SToby Isaac 26338e54d7e8SToby Isaac PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 26348e54d7e8SToby Isaac PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 26358e54d7e8SToby Isaac // ... 26368e54d7e8SToby Isaac PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 26378e54d7e8SToby Isaac PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 26388e54d7e8SToby Isaac // ... (other communications) 26398e54d7e8SToby Isaac 26408e54d7e8SToby Isaac // rootdata and leafdata must be deregistered before freeing 26418e54d7e8SToby Isaac // skipping this can lead to undefined behavior including 26428e54d7e8SToby Isaac // deadlocks 26438e54d7e8SToby Isaac PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 26448e54d7e8SToby Isaac 26458e54d7e8SToby Isaac // it is now safe to free rootdata and leafdata 26468e54d7e8SToby Isaac PetscFree2(rootdata, leafdata); 26478e54d7e8SToby Isaac .ve 26488e54d7e8SToby Isaac 26498e54d7e8SToby Isaac If you do not register `rootdata` and `leafdata` it will not cause an error, 26508e54d7e8SToby Isaac but optimizations that reduce the setup time for each communication cannot be 26518e54d7e8SToby Isaac made. Currently, the only implementation of `PetscSF` that benefits from 26528e54d7e8SToby Isaac `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 26538e54d7e8SToby Isaac `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 26548e54d7e8SToby Isaac 26558e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 26568e54d7e8SToby Isaac @*/ 26578e54d7e8SToby Isaac PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 26588e54d7e8SToby Isaac { 26598e54d7e8SToby Isaac PetscFunctionBegin; 26608e54d7e8SToby Isaac PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 26618e54d7e8SToby Isaac PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 26628e54d7e8SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 26638e54d7e8SToby Isaac } 26648e54d7e8SToby Isaac 26658e54d7e8SToby Isaac /*@ 26668e54d7e8SToby Isaac PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded. 26678e54d7e8SToby Isaac 26688e54d7e8SToby Isaac Collective 26698e54d7e8SToby Isaac 26708e54d7e8SToby Isaac Input Parameters: 26718e54d7e8SToby Isaac + sf - star forest 26728e54d7e8SToby Isaac . unit - the data type contained within the root and leaf data 26738e54d7e8SToby Isaac . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 26748e54d7e8SToby Isaac - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 26758e54d7e8SToby Isaac 26768e54d7e8SToby Isaac Level: advanced 26778e54d7e8SToby Isaac 26788e54d7e8SToby Isaac Note: 26798e54d7e8SToby Isaac See `PetscSFRegisterPersistent()` for when/how to use this function. 26808e54d7e8SToby Isaac 26818e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 26828e54d7e8SToby Isaac @*/ 26838e54d7e8SToby Isaac PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 26848e54d7e8SToby Isaac { 26858e54d7e8SToby Isaac PetscFunctionBegin; 26868e54d7e8SToby Isaac PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 26878e54d7e8SToby Isaac PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 26888e54d7e8SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 26898e54d7e8SToby Isaac } 2690*e1187f0dSToby Isaac 2691*e1187f0dSToby Isaac PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2692*e1187f0dSToby Isaac { 2693*e1187f0dSToby Isaac MPI_Aint lb, lb_true, bytes, bytes_true; 2694*e1187f0dSToby Isaac 2695*e1187f0dSToby Isaac PetscFunctionBegin; 2696*e1187f0dSToby Isaac PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2697*e1187f0dSToby Isaac PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2698*e1187f0dSToby Isaac PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2699*e1187f0dSToby Isaac *size = bytes; 2700*e1187f0dSToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 2701*e1187f0dSToby Isaac } 2702