1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h> 353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h> 595fce210SBarry Smith 67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 77fd2d3dbSJunchao Zhang #include <cuda_runtime.h> 8715b587bSJunchao Zhang #include <petscdevice_cuda.h> 97fd2d3dbSJunchao Zhang #endif 107fd2d3dbSJunchao Zhang 117fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP) 127fd2d3dbSJunchao Zhang #include <hip/hip_runtime.h> 137fd2d3dbSJunchao Zhang #endif 147fd2d3dbSJunchao Zhang 152abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER) 164bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int); 172abc8c78SJacob Faibussowitsch #else 1895fce210SBarry Smith #if defined(PETSC_USE_DEBUG) 19a8f51744SPierre Jolivet #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 2095fce210SBarry Smith #else 219371c9d4SSatish Balay #define PetscSFCheckGraphSet(sf, arg) \ 229371c9d4SSatish Balay do { \ 239371c9d4SSatish Balay } while (0) 2495fce210SBarry Smith #endif 252abc8c78SJacob Faibussowitsch #endif 2695fce210SBarry Smith 274c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 281f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 2995fce210SBarry Smith 308af6ec1cSBarry Smith /*@ 3195fce210SBarry Smith PetscSFCreate - create a star forest communication context 3295fce210SBarry Smith 33d083f849SBarry Smith Collective 3495fce210SBarry Smith 354165533cSJose E. Roman Input Parameter: 3695fce210SBarry Smith . comm - communicator on which the star forest will operate 3795fce210SBarry Smith 384165533cSJose E. Roman Output Parameter: 3995fce210SBarry Smith . sf - new star forest context 4095fce210SBarry Smith 4120662ed9SBarry Smith Options Database Key: 426677b1c1SJunchao Zhang + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 436677b1c1SJunchao Zhang . -sf_type window - Use MPI-3 one-sided window for communication 446677b1c1SJunchao Zhang . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 456677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46dd5b3ca6SJunchao Zhang 4795fce210SBarry Smith Level: intermediate 4895fce210SBarry Smith 49cab54364SBarry Smith Note: 50cab54364SBarry Smith When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51cab54364SBarry Smith `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 5220662ed9SBarry Smith `SF`s are optimized and they have better performance than the general `SF`s. 53dd5b3ca6SJunchao Zhang 5438b5cf2dSJacob Faibussowitsch .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 5595fce210SBarry Smith @*/ 56d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57d71ae5a4SJacob Faibussowitsch { 5895fce210SBarry Smith PetscSF b; 5995fce210SBarry Smith 6095fce210SBarry Smith PetscFunctionBegin; 614f572ea9SToby Isaac PetscAssertPointer(sf, 2); 629566063dSJacob Faibussowitsch PetscCall(PetscSFInitializePackage()); 6395fce210SBarry Smith 649566063dSJacob Faibussowitsch PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 6595fce210SBarry Smith b->nroots = -1; 6695fce210SBarry Smith b->nleaves = -1; 671690c2aeSBarry Smith b->minleaf = PETSC_INT_MAX; 681690c2aeSBarry Smith b->maxleaf = PETSC_INT_MIN; 6995fce210SBarry Smith b->nranks = -1; 7095fce210SBarry Smith b->rankorder = PETSC_TRUE; 7195fce210SBarry Smith b->ingroup = MPI_GROUP_NULL; 7295fce210SBarry Smith b->outgroup = MPI_GROUP_NULL; 7395fce210SBarry Smith b->graphset = PETSC_FALSE; 7420c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 7520c24465SJunchao Zhang b->use_gpu_aware_mpi = use_gpu_aware_mpi; 7620c24465SJunchao Zhang b->use_stream_aware_mpi = PETSC_FALSE; 7771438e86SJunchao Zhang b->unknown_input_stream = PETSC_FALSE; 7827f636e8SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 7920c24465SJunchao Zhang b->backend = PETSCSF_BACKEND_KOKKOS; 8027f636e8SJunchao Zhang #elif defined(PETSC_HAVE_CUDA) 8127f636e8SJunchao Zhang b->backend = PETSCSF_BACKEND_CUDA; 8259af0bd3SScott Kruger #elif defined(PETSC_HAVE_HIP) 8359af0bd3SScott Kruger b->backend = PETSCSF_BACKEND_HIP; 8420c24465SJunchao Zhang #endif 8571438e86SJunchao Zhang 8671438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 8771438e86SJunchao Zhang b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 8871438e86SJunchao Zhang b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 899566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 909566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 9171438e86SJunchao Zhang #endif 9220c24465SJunchao Zhang #endif 9360c22052SBarry Smith b->vscat.from_n = -1; 9460c22052SBarry Smith b->vscat.to_n = -1; 9560c22052SBarry Smith b->vscat.unit = MPIU_SCALAR; 9695fce210SBarry Smith *sf = b; 973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9895fce210SBarry Smith } 9995fce210SBarry Smith 10029046d53SLisandro Dalcin /*@ 10195fce210SBarry Smith PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 10295fce210SBarry Smith 10395fce210SBarry Smith Collective 10495fce210SBarry Smith 1054165533cSJose E. Roman Input Parameter: 10695fce210SBarry Smith . sf - star forest 10795fce210SBarry Smith 10895fce210SBarry Smith Level: advanced 10995fce210SBarry Smith 110cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 11195fce210SBarry Smith @*/ 112d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf) 113d71ae5a4SJacob Faibussowitsch { 11495fce210SBarry Smith PetscFunctionBegin; 11595fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 116dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Reset); 1170dd791a8SStefano Zampini PetscCall(PetscSFDestroy(&sf->rankssf)); 1180dd791a8SStefano Zampini 11929046d53SLisandro Dalcin sf->nroots = -1; 12029046d53SLisandro Dalcin sf->nleaves = -1; 1211690c2aeSBarry Smith sf->minleaf = PETSC_INT_MAX; 1221690c2aeSBarry Smith sf->maxleaf = PETSC_INT_MIN; 12395fce210SBarry Smith sf->mine = NULL; 12495fce210SBarry Smith sf->remote = NULL; 12529046d53SLisandro Dalcin sf->graphset = PETSC_FALSE; 1269566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->mine_alloc)); 1279566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->remote_alloc)); 12821c688dcSJed Brown sf->nranks = -1; 1299566063dSJacob Faibussowitsch PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 13029046d53SLisandro Dalcin sf->degreeknown = PETSC_FALSE; 1319566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->degree)); 1329566063dSJacob Faibussowitsch if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 1339566063dSJacob Faibussowitsch if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 1340dd791a8SStefano Zampini 135013b3241SStefano Zampini if (sf->multi) sf->multi->multi = NULL; 1369566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf->multi)); 1370dd791a8SStefano Zampini 1389566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sf->map)); 13971438e86SJunchao Zhang 14071438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 1419566063dSJacob Faibussowitsch for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 14271438e86SJunchao Zhang #endif 14371438e86SJunchao Zhang 14495fce210SBarry Smith sf->setupcalled = PETSC_FALSE; 1453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14695fce210SBarry Smith } 14795fce210SBarry Smith 148cc4c1da9SBarry Smith /*@ 149cab54364SBarry Smith PetscSFSetType - Set the `PetscSF` communication implementation 15095fce210SBarry Smith 151c3339decSBarry Smith Collective 15295fce210SBarry Smith 15395fce210SBarry Smith Input Parameters: 154cab54364SBarry Smith + sf - the `PetscSF` context 15595fce210SBarry Smith - type - a known method 156cab54364SBarry Smith .vb 157cab54364SBarry Smith PETSCSFWINDOW - MPI-2/3 one-sided 158cab54364SBarry Smith PETSCSFBASIC - basic implementation using MPI-1 two-sided 159cab54364SBarry Smith .ve 16095fce210SBarry Smith 16195fce210SBarry Smith Options Database Key: 16220662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 163cab54364SBarry Smith 164cab54364SBarry Smith Level: intermediate 16595fce210SBarry Smith 16695fce210SBarry Smith Notes: 16720662ed9SBarry Smith See `PetscSFType` for possible values 16895fce210SBarry Smith 16920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 17095fce210SBarry Smith @*/ 171d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 172d71ae5a4SJacob Faibussowitsch { 17395fce210SBarry Smith PetscBool match; 1745f80ce2aSJacob Faibussowitsch PetscErrorCode (*r)(PetscSF); 17595fce210SBarry Smith 17695fce210SBarry Smith PetscFunctionBegin; 17795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1784f572ea9SToby Isaac PetscAssertPointer(type, 2); 17995fce210SBarry Smith 1809566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 1813ba16761SJacob Faibussowitsch if (match) PetscFunctionReturn(PETSC_SUCCESS); 18295fce210SBarry Smith 1839566063dSJacob Faibussowitsch PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 1846adde796SStefano Zampini PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 18529046d53SLisandro Dalcin /* Destroy the previous PetscSF implementation context */ 186dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Destroy); 1879566063dSJacob Faibussowitsch PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 1889566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 1899566063dSJacob Faibussowitsch PetscCall((*r)(sf)); 1903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19195fce210SBarry Smith } 19295fce210SBarry Smith 193cc4c1da9SBarry Smith /*@ 194cab54364SBarry Smith PetscSFGetType - Get the `PetscSF` communication implementation 19529046d53SLisandro Dalcin 19629046d53SLisandro Dalcin Not Collective 19729046d53SLisandro Dalcin 19829046d53SLisandro Dalcin Input Parameter: 199cab54364SBarry Smith . sf - the `PetscSF` context 20029046d53SLisandro Dalcin 20129046d53SLisandro Dalcin Output Parameter: 202cab54364SBarry Smith . type - the `PetscSF` type name 20329046d53SLisandro Dalcin 20429046d53SLisandro Dalcin Level: intermediate 20529046d53SLisandro Dalcin 20620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 20729046d53SLisandro Dalcin @*/ 208d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 209d71ae5a4SJacob Faibussowitsch { 21029046d53SLisandro Dalcin PetscFunctionBegin; 21129046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2124f572ea9SToby Isaac PetscAssertPointer(type, 2); 21329046d53SLisandro Dalcin *type = ((PetscObject)sf)->type_name; 2143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21529046d53SLisandro Dalcin } 21629046d53SLisandro Dalcin 2170764c050SBarry Smith /*@ 21820662ed9SBarry Smith PetscSFDestroy - destroy a star forest 21995fce210SBarry Smith 22095fce210SBarry Smith Collective 22195fce210SBarry Smith 2224165533cSJose E. Roman Input Parameter: 22395fce210SBarry Smith . sf - address of star forest 22495fce210SBarry Smith 22595fce210SBarry Smith Level: intermediate 22695fce210SBarry Smith 22720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 22895fce210SBarry Smith @*/ 229d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf) 230d71ae5a4SJacob Faibussowitsch { 23195fce210SBarry Smith PetscFunctionBegin; 2323ba16761SJacob Faibussowitsch if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 233f4f49eeaSPierre Jolivet PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 234f4f49eeaSPierre Jolivet if (--((PetscObject)*sf)->refct > 0) { 2359371c9d4SSatish Balay *sf = NULL; 2363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2379371c9d4SSatish Balay } 2389566063dSJacob Faibussowitsch PetscCall(PetscSFReset(*sf)); 239f4f49eeaSPierre Jolivet PetscTryTypeMethod(*sf, Destroy); 2409566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 2419566063dSJacob Faibussowitsch if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 242c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 243715b587bSJunchao Zhang if ((*sf)->use_stream_aware_mpi) { 244715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 245715b587bSJunchao Zhang PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 246715b587bSJunchao Zhang } 247715b587bSJunchao Zhang #endif 2489566063dSJacob Faibussowitsch PetscCall(PetscHeaderDestroy(sf)); 2493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25095fce210SBarry Smith } 25195fce210SBarry Smith 252d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 253d71ae5a4SJacob Faibussowitsch { 254c4e6a40aSLawrence Mitchell PetscInt i, nleaves; 255c4e6a40aSLawrence Mitchell PetscMPIInt size; 256c4e6a40aSLawrence Mitchell const PetscInt *ilocal; 257c4e6a40aSLawrence Mitchell const PetscSFNode *iremote; 258c4e6a40aSLawrence Mitchell 259c4e6a40aSLawrence Mitchell PetscFunctionBegin; 2603ba16761SJacob Faibussowitsch if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 2619566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 2629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 263c4e6a40aSLawrence Mitchell for (i = 0; i < nleaves; i++) { 264c4e6a40aSLawrence Mitchell const PetscInt rank = iremote[i].rank; 265c4e6a40aSLawrence Mitchell const PetscInt remote = iremote[i].index; 266c4e6a40aSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 267c9cc58a2SBarry Smith PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 26808401ef6SPierre Jolivet PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 26908401ef6SPierre Jolivet PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 270c4e6a40aSLawrence Mitchell } 2713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 272c4e6a40aSLawrence Mitchell } 273c4e6a40aSLawrence Mitchell 27495fce210SBarry Smith /*@ 27520662ed9SBarry Smith PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 27695fce210SBarry Smith 27795fce210SBarry Smith Collective 27895fce210SBarry Smith 2794165533cSJose E. Roman Input Parameter: 28095fce210SBarry Smith . sf - star forest communication object 28195fce210SBarry Smith 28295fce210SBarry Smith Level: beginner 28395fce210SBarry Smith 28420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 28595fce210SBarry Smith @*/ 286d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf) 287d71ae5a4SJacob Faibussowitsch { 28895fce210SBarry Smith PetscFunctionBegin; 28929046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 29029046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 2913ba16761SJacob Faibussowitsch if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 2929566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 2939566063dSJacob Faibussowitsch PetscCall(PetscSFCheckGraphValid_Private(sf)); 2949566063dSJacob Faibussowitsch if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 295dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, SetUp); 29620c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 29720c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_CUDA) { 29871438e86SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_CUDA; 29971438e86SJunchao Zhang sf->ops->Free = PetscSFFree_CUDA; 30020c24465SJunchao Zhang } 30120c24465SJunchao Zhang #endif 30259af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 30359af0bd3SScott Kruger if (sf->backend == PETSCSF_BACKEND_HIP) { 30459af0bd3SScott Kruger sf->ops->Malloc = PetscSFMalloc_HIP; 30559af0bd3SScott Kruger sf->ops->Free = PetscSFFree_HIP; 30659af0bd3SScott Kruger } 30759af0bd3SScott Kruger #endif 30820c24465SJunchao Zhang 30920c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 31020c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 31120c24465SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_Kokkos; 31220c24465SJunchao Zhang sf->ops->Free = PetscSFFree_Kokkos; 31320c24465SJunchao Zhang } 31420c24465SJunchao Zhang #endif 3159566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 31695fce210SBarry Smith sf->setupcalled = PETSC_TRUE; 3173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31895fce210SBarry Smith } 31995fce210SBarry Smith 3208af6ec1cSBarry Smith /*@ 321cab54364SBarry Smith PetscSFSetFromOptions - set `PetscSF` options using the options database 32295fce210SBarry Smith 32395fce210SBarry Smith Logically Collective 32495fce210SBarry Smith 3254165533cSJose E. Roman Input Parameter: 32695fce210SBarry Smith . sf - star forest 32795fce210SBarry Smith 32895fce210SBarry Smith Options Database Keys: 32920662ed9SBarry Smith + -sf_type - implementation type, see `PetscSFSetType()` 33051ccb202SJunchao Zhang . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 33120662ed9SBarry Smith . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 33220662ed9SBarry Smith use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 33320662ed9SBarry Smith If true, this option only works with `-use_gpu_aware_mpi 1`. 33420662ed9SBarry Smith . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 33520662ed9SBarry Smith If true, this option only works with `-use_gpu_aware_mpi 1`. 33695fce210SBarry Smith 3376497c311SBarry Smith - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 33859af0bd3SScott Kruger On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 33920c24465SJunchao Zhang the only available is kokkos. 34020c24465SJunchao Zhang 34195fce210SBarry Smith Level: intermediate 342cab54364SBarry Smith 343cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 34495fce210SBarry Smith @*/ 345d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 346d71ae5a4SJacob Faibussowitsch { 34795fce210SBarry Smith PetscSFType deft; 34895fce210SBarry Smith char type[256]; 34995fce210SBarry Smith PetscBool flg; 35095fce210SBarry Smith 35195fce210SBarry Smith PetscFunctionBegin; 35295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 353d0609cedSBarry Smith PetscObjectOptionsBegin((PetscObject)sf); 35495fce210SBarry Smith deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 3559566063dSJacob Faibussowitsch PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 3569566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(sf, flg ? type : deft)); 3579566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 358*f9334340SJunchao Zhang PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL)); 3597fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 36020c24465SJunchao Zhang { 36120c24465SJunchao Zhang char backendstr[32] = {0}; 36259af0bd3SScott Kruger PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 36320c24465SJunchao Zhang /* Change the defaults set in PetscSFCreate() with command line options */ 364d5b43468SJose E. Roman PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 3659566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 3669566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 3679566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 3689566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 3699566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 37059af0bd3SScott Kruger #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 37120c24465SJunchao Zhang if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 37220c24465SJunchao Zhang else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 37359af0bd3SScott Kruger else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 37428b400f6SJacob Faibussowitsch else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 37520c24465SJunchao Zhang #elif defined(PETSC_HAVE_KOKKOS) 37608401ef6SPierre Jolivet PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 37720c24465SJunchao Zhang #endif 378715b587bSJunchao Zhang 379715b587bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 380715b587bSJunchao Zhang if (sf->use_stream_aware_mpi) { 381715b587bSJunchao Zhang MPI_Info info; 382715b587bSJunchao Zhang 383715b587bSJunchao Zhang PetscCallMPI(MPI_Info_create(&info)); 384715b587bSJunchao Zhang PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 385715b587bSJunchao Zhang PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 386715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 387715b587bSJunchao Zhang PetscCallMPI(MPI_Info_free(&info)); 388715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 389715b587bSJunchao Zhang } 390715b587bSJunchao Zhang #endif 39120c24465SJunchao Zhang } 392c2a741eeSJunchao Zhang #endif 393dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 394d0609cedSBarry Smith PetscOptionsEnd(); 3953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39695fce210SBarry Smith } 39795fce210SBarry Smith 39829046d53SLisandro Dalcin /*@ 39995fce210SBarry Smith PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 40095fce210SBarry Smith 40195fce210SBarry Smith Logically Collective 40295fce210SBarry Smith 4034165533cSJose E. Roman Input Parameters: 40495fce210SBarry Smith + sf - star forest 405cab54364SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 40695fce210SBarry Smith 40795fce210SBarry Smith Level: advanced 40895fce210SBarry Smith 40920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 41095fce210SBarry Smith @*/ 411d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 412d71ae5a4SJacob Faibussowitsch { 41395fce210SBarry Smith PetscFunctionBegin; 41495fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 41595fce210SBarry Smith PetscValidLogicalCollectiveBool(sf, flg, 2); 41628b400f6SJacob Faibussowitsch PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 41795fce210SBarry Smith sf->rankorder = flg; 4183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41995fce210SBarry Smith } 42095fce210SBarry Smith 4215d83a8b1SBarry Smith /*@ 42295fce210SBarry Smith PetscSFSetGraph - Set a parallel star forest 42395fce210SBarry Smith 42495fce210SBarry Smith Collective 42595fce210SBarry Smith 4264165533cSJose E. Roman Input Parameters: 42795fce210SBarry Smith + sf - star forest 42895fce210SBarry Smith . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 42995fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 43020662ed9SBarry Smith . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 431c4e6a40aSLawrence Mitchell during setup in debug mode) 43220662ed9SBarry Smith . localmode - copy mode for `ilocal` 433c4e6a40aSLawrence Mitchell . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 434c4e6a40aSLawrence Mitchell during setup in debug mode) 43520662ed9SBarry Smith - remotemode - copy mode for `iremote` 43695fce210SBarry Smith 43795fce210SBarry Smith Level: intermediate 43895fce210SBarry Smith 43995452b02SPatrick Sanan Notes: 44020662ed9SBarry Smith Leaf indices in `ilocal` must be unique, otherwise an error occurs. 44138ab3f8aSBarry Smith 44220662ed9SBarry Smith Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 44320662ed9SBarry Smith In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 444db2b9530SVaclav Hapla PETSc might modify the respective array; 44520662ed9SBarry Smith if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 446cab54364SBarry Smith Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 447db2b9530SVaclav Hapla 44838b5cf2dSJacob Faibussowitsch Fortran Notes: 44920662ed9SBarry Smith In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 450c4e6a40aSLawrence Mitchell 45138b5cf2dSJacob Faibussowitsch Developer Notes: 452db2b9530SVaclav Hapla We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 45320662ed9SBarry Smith This also allows to compare leaf sets of two `PetscSF`s easily. 45472bf8598SVaclav Hapla 45520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 45695fce210SBarry Smith @*/ 457d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 458d71ae5a4SJacob Faibussowitsch { 459db2b9530SVaclav Hapla PetscBool unique, contiguous; 46095fce210SBarry Smith 46195fce210SBarry Smith PetscFunctionBegin; 46295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 4634f572ea9SToby Isaac if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 4644f572ea9SToby Isaac if (nleaves > 0) PetscAssertPointer(iremote, 6); 46508401ef6SPierre Jolivet PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 46608401ef6SPierre Jolivet PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 4678da24d32SBarry Smith /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 4688da24d32SBarry Smith * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 4698da24d32SBarry Smith PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 4708da24d32SBarry Smith PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 47129046d53SLisandro Dalcin 4722a67d2daSStefano Zampini if (sf->nroots >= 0) { /* Reset only if graph already set */ 4739566063dSJacob Faibussowitsch PetscCall(PetscSFReset(sf)); 4742a67d2daSStefano Zampini } 4752a67d2daSStefano Zampini 4769566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 4776497c311SBarry Smith if (PetscDefined(USE_DEBUG)) { 4786497c311SBarry Smith PetscMPIInt size; 4796497c311SBarry Smith 4806497c311SBarry Smith PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 4816497c311SBarry Smith for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 4826497c311SBarry Smith } 48329046d53SLisandro Dalcin 48495fce210SBarry Smith sf->nroots = nroots; 48595fce210SBarry Smith sf->nleaves = nleaves; 48629046d53SLisandro Dalcin 487db2b9530SVaclav Hapla if (localmode == PETSC_COPY_VALUES && ilocal) { 488db2b9530SVaclav Hapla PetscInt *tlocal = NULL; 489db2b9530SVaclav Hapla 4909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &tlocal)); 4919566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 492db2b9530SVaclav Hapla ilocal = tlocal; 493db2b9530SVaclav Hapla } 494db2b9530SVaclav Hapla if (remotemode == PETSC_COPY_VALUES) { 495db2b9530SVaclav Hapla PetscSFNode *tremote = NULL; 496db2b9530SVaclav Hapla 4979566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &tremote)); 4989566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 499db2b9530SVaclav Hapla iremote = tremote; 500db2b9530SVaclav Hapla } 501db2b9530SVaclav Hapla 50229046d53SLisandro Dalcin if (nleaves && ilocal) { 503db2b9530SVaclav Hapla PetscSFNode work; 504db2b9530SVaclav Hapla 5059566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 5069566063dSJacob Faibussowitsch PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 507db2b9530SVaclav Hapla unique = PetscNot(unique); 508db2b9530SVaclav Hapla PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 509db2b9530SVaclav Hapla sf->minleaf = ilocal[0]; 510db2b9530SVaclav Hapla sf->maxleaf = ilocal[nleaves - 1]; 511db2b9530SVaclav Hapla contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 51229046d53SLisandro Dalcin } else { 51329046d53SLisandro Dalcin sf->minleaf = 0; 51429046d53SLisandro Dalcin sf->maxleaf = nleaves - 1; 515db2b9530SVaclav Hapla unique = PETSC_TRUE; 516db2b9530SVaclav Hapla contiguous = PETSC_TRUE; 51729046d53SLisandro Dalcin } 51829046d53SLisandro Dalcin 519db2b9530SVaclav Hapla if (contiguous) { 520db2b9530SVaclav Hapla if (localmode == PETSC_USE_POINTER) { 521db2b9530SVaclav Hapla ilocal = NULL; 522db2b9530SVaclav Hapla } else { 5239566063dSJacob Faibussowitsch PetscCall(PetscFree(ilocal)); 524db2b9530SVaclav Hapla } 525db2b9530SVaclav Hapla } 526db2b9530SVaclav Hapla sf->mine = ilocal; 527db2b9530SVaclav Hapla if (localmode == PETSC_USE_POINTER) { 52829046d53SLisandro Dalcin sf->mine_alloc = NULL; 529db2b9530SVaclav Hapla } else { 530db2b9530SVaclav Hapla sf->mine_alloc = ilocal; 53195fce210SBarry Smith } 5326497c311SBarry Smith if (PetscDefined(USE_DEBUG)) { 5336497c311SBarry Smith PetscMPIInt size; 5346497c311SBarry Smith 5356497c311SBarry Smith PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 5366497c311SBarry Smith for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 5376497c311SBarry Smith } 538db2b9530SVaclav Hapla sf->remote = iremote; 539db2b9530SVaclav Hapla if (remotemode == PETSC_USE_POINTER) { 54029046d53SLisandro Dalcin sf->remote_alloc = NULL; 541db2b9530SVaclav Hapla } else { 542db2b9530SVaclav Hapla sf->remote_alloc = iremote; 54395fce210SBarry Smith } 5449566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 54529046d53SLisandro Dalcin sf->graphset = PETSC_TRUE; 5463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 54795fce210SBarry Smith } 54895fce210SBarry Smith 54929046d53SLisandro Dalcin /*@ 550cab54364SBarry Smith PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 551dd5b3ca6SJunchao Zhang 552dd5b3ca6SJunchao Zhang Collective 553dd5b3ca6SJunchao Zhang 554dd5b3ca6SJunchao Zhang Input Parameters: 555cab54364SBarry Smith + sf - The `PetscSF` 556cab54364SBarry Smith . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 557cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 558cab54364SBarry Smith 559cab54364SBarry Smith Level: intermediate 560dd5b3ca6SJunchao Zhang 561dd5b3ca6SJunchao Zhang Notes: 56220662ed9SBarry Smith It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 56320662ed9SBarry Smith `n` and `N` are the local and global sizes of `x` respectively. 564dd5b3ca6SJunchao Zhang 56520662ed9SBarry Smith With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 56620662ed9SBarry Smith sequential vectors `y` on all MPI processes. 567dd5b3ca6SJunchao Zhang 56820662ed9SBarry Smith With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 56920662ed9SBarry Smith sequential vector `y` on rank 0. 570dd5b3ca6SJunchao Zhang 57120662ed9SBarry Smith In above cases, entries of `x` are roots and entries of `y` are leaves. 572dd5b3ca6SJunchao Zhang 57320662ed9SBarry Smith With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 574dd5b3ca6SJunchao Zhang creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 575cab54364SBarry Smith of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 576dd5b3ca6SJunchao Zhang not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 577cab54364SBarry Smith items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 578dd5b3ca6SJunchao Zhang 579dd5b3ca6SJunchao Zhang In this case, roots and leaves are symmetric. 580dd5b3ca6SJunchao Zhang 581cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 582dd5b3ca6SJunchao Zhang @*/ 583d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 584d71ae5a4SJacob Faibussowitsch { 585dd5b3ca6SJunchao Zhang MPI_Comm comm; 586dd5b3ca6SJunchao Zhang PetscInt n, N, res[2]; 587dd5b3ca6SJunchao Zhang PetscMPIInt rank, size; 588dd5b3ca6SJunchao Zhang PetscSFType type; 589dd5b3ca6SJunchao Zhang 590dd5b3ca6SJunchao Zhang PetscFunctionBegin; 5912abc8c78SJacob Faibussowitsch PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 5924f572ea9SToby Isaac if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 5939566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 5942c71b3e2SJacob Faibussowitsch PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 5959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 597dd5b3ca6SJunchao Zhang 598dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLTOALL) { 599dd5b3ca6SJunchao Zhang type = PETSCSFALLTOALL; 6009566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &sf->map)); 6019566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 60257508eceSPierre Jolivet PetscCall(PetscLayoutSetSize(sf->map, (PetscInt)size * size)); 6039566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(sf->map)); 604dd5b3ca6SJunchao Zhang } else { 6059566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetLocalSize(map, &n)); 6069566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetSize(map, &N)); 607dd5b3ca6SJunchao Zhang res[0] = n; 608dd5b3ca6SJunchao Zhang res[1] = -n; 609dd5b3ca6SJunchao Zhang /* Check if n are same over all ranks so that we can optimize it */ 610462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 611dd5b3ca6SJunchao Zhang if (res[0] == -res[1]) { /* same n */ 612dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 613dd5b3ca6SJunchao Zhang } else { 614dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 615dd5b3ca6SJunchao Zhang } 6169566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(map, &sf->map)); 617dd5b3ca6SJunchao Zhang } 6189566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(sf, type)); 619dd5b3ca6SJunchao Zhang 620dd5b3ca6SJunchao Zhang sf->pattern = pattern; 621dd5b3ca6SJunchao Zhang sf->mine = NULL; /* Contiguous */ 622dd5b3ca6SJunchao Zhang 623dd5b3ca6SJunchao Zhang /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 624dd5b3ca6SJunchao Zhang Also set other easy stuff. 625dd5b3ca6SJunchao Zhang */ 626dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLGATHER) { 627dd5b3ca6SJunchao Zhang sf->nleaves = N; 628dd5b3ca6SJunchao Zhang sf->nroots = n; 629dd5b3ca6SJunchao Zhang sf->nranks = size; 630dd5b3ca6SJunchao Zhang sf->minleaf = 0; 631dd5b3ca6SJunchao Zhang sf->maxleaf = N - 1; 632dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_GATHER) { 633dd5b3ca6SJunchao Zhang sf->nleaves = rank ? 0 : N; 634dd5b3ca6SJunchao Zhang sf->nroots = n; 635dd5b3ca6SJunchao Zhang sf->nranks = rank ? 0 : size; 636dd5b3ca6SJunchao Zhang sf->minleaf = 0; 637dd5b3ca6SJunchao Zhang sf->maxleaf = rank ? -1 : N - 1; 638dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 639dd5b3ca6SJunchao Zhang sf->nleaves = size; 640dd5b3ca6SJunchao Zhang sf->nroots = size; 641dd5b3ca6SJunchao Zhang sf->nranks = size; 642dd5b3ca6SJunchao Zhang sf->minleaf = 0; 643dd5b3ca6SJunchao Zhang sf->maxleaf = size - 1; 644dd5b3ca6SJunchao Zhang } 645dd5b3ca6SJunchao Zhang sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 646dd5b3ca6SJunchao Zhang sf->graphset = PETSC_TRUE; 6473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 648dd5b3ca6SJunchao Zhang } 649dd5b3ca6SJunchao Zhang 650dd5b3ca6SJunchao Zhang /*@ 651cab54364SBarry Smith PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 65295fce210SBarry Smith 65395fce210SBarry Smith Collective 65495fce210SBarry Smith 6554165533cSJose E. Roman Input Parameter: 65695fce210SBarry Smith . sf - star forest to invert 65795fce210SBarry Smith 6584165533cSJose E. Roman Output Parameter: 65920662ed9SBarry Smith . isf - inverse of `sf` 6604165533cSJose E. Roman 66195fce210SBarry Smith Level: advanced 66295fce210SBarry Smith 66395fce210SBarry Smith Notes: 66495fce210SBarry Smith All roots must have degree 1. 66595fce210SBarry Smith 66695fce210SBarry Smith The local space may be a permutation, but cannot be sparse. 66795fce210SBarry Smith 66820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 66995fce210SBarry Smith @*/ 670d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 671d71ae5a4SJacob Faibussowitsch { 67295fce210SBarry Smith PetscMPIInt rank; 67395fce210SBarry Smith PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 67495fce210SBarry Smith const PetscInt *ilocal; 67595fce210SBarry Smith PetscSFNode *roots, *leaves; 67695fce210SBarry Smith 67795fce210SBarry Smith PetscFunctionBegin; 67829046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 67929046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 6804f572ea9SToby Isaac PetscAssertPointer(isf, 2); 68129046d53SLisandro Dalcin 6829566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 68329046d53SLisandro Dalcin maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 68429046d53SLisandro Dalcin 6859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 6869566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 687ae9aee6dSMatthew G. Knepley for (i = 0; i < maxlocal; i++) { 68895fce210SBarry Smith leaves[i].rank = rank; 68995fce210SBarry Smith leaves[i].index = i; 69095fce210SBarry Smith } 69195fce210SBarry Smith for (i = 0; i < nroots; i++) { 69295fce210SBarry Smith roots[i].rank = -1; 69395fce210SBarry Smith roots[i].index = -1; 69495fce210SBarry Smith } 6956497c311SBarry Smith PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 6966497c311SBarry Smith PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 69795fce210SBarry Smith 69895fce210SBarry Smith /* Check whether our leaves are sparse */ 6999371c9d4SSatish Balay for (i = 0, count = 0; i < nroots; i++) 7009371c9d4SSatish Balay if (roots[i].rank >= 0) count++; 70195fce210SBarry Smith if (count == nroots) newilocal = NULL; 7029371c9d4SSatish Balay else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 70395fce210SBarry Smith for (i = 0, count = 0; i < nroots; i++) { 70495fce210SBarry Smith if (roots[i].rank >= 0) { 70595fce210SBarry Smith newilocal[count] = i; 70695fce210SBarry Smith roots[count].rank = roots[i].rank; 70795fce210SBarry Smith roots[count].index = roots[i].index; 70895fce210SBarry Smith count++; 70995fce210SBarry Smith } 71095fce210SBarry Smith } 71195fce210SBarry Smith } 71295fce210SBarry Smith 7139566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 7149566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 7159566063dSJacob Faibussowitsch PetscCall(PetscFree2(roots, leaves)); 7163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 71795fce210SBarry Smith } 71895fce210SBarry Smith 71995fce210SBarry Smith /*@ 720cab54364SBarry Smith PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 72195fce210SBarry Smith 72295fce210SBarry Smith Collective 72395fce210SBarry Smith 7244165533cSJose E. Roman Input Parameters: 72595fce210SBarry Smith + sf - communication object to duplicate 726cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 72795fce210SBarry Smith 7284165533cSJose E. Roman Output Parameter: 72995fce210SBarry Smith . newsf - new communication object 73095fce210SBarry Smith 73195fce210SBarry Smith Level: beginner 73295fce210SBarry Smith 73320662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 73495fce210SBarry Smith @*/ 735d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 736d71ae5a4SJacob Faibussowitsch { 73729046d53SLisandro Dalcin PetscSFType type; 73897929ea7SJunchao Zhang MPI_Datatype dtype = MPIU_SCALAR; 73995fce210SBarry Smith 74095fce210SBarry Smith PetscFunctionBegin; 74129046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 74229046d53SLisandro Dalcin PetscValidLogicalCollectiveEnum(sf, opt, 2); 7434f572ea9SToby Isaac PetscAssertPointer(newsf, 3); 7449566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 7459566063dSJacob Faibussowitsch PetscCall(PetscSFGetType(sf, &type)); 7469566063dSJacob Faibussowitsch if (type) PetscCall(PetscSFSetType(*newsf, type)); 74735cb6cd3SPierre Jolivet (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 74895fce210SBarry Smith if (opt == PETSCSF_DUPLICATE_GRAPH) { 749dd5b3ca6SJunchao Zhang PetscSFCheckGraphSet(sf, 1); 750dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 75195fce210SBarry Smith PetscInt nroots, nleaves; 75295fce210SBarry Smith const PetscInt *ilocal; 75395fce210SBarry Smith const PetscSFNode *iremote; 7549566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 7559566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 756dd5b3ca6SJunchao Zhang } else { 7579566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 758dd5b3ca6SJunchao Zhang } 75995fce210SBarry Smith } 76097929ea7SJunchao Zhang /* Since oldtype is committed, so is newtype, according to MPI */ 7619566063dSJacob Faibussowitsch if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 76297929ea7SJunchao Zhang (*newsf)->vscat.bs = sf->vscat.bs; 76397929ea7SJunchao Zhang (*newsf)->vscat.unit = dtype; 76497929ea7SJunchao Zhang (*newsf)->vscat.to_n = sf->vscat.to_n; 76597929ea7SJunchao Zhang (*newsf)->vscat.from_n = sf->vscat.from_n; 76697929ea7SJunchao Zhang /* Do not copy lsf. Build it on demand since it is rarely used */ 76797929ea7SJunchao Zhang 76820c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 76920c24465SJunchao Zhang (*newsf)->backend = sf->backend; 77071438e86SJunchao Zhang (*newsf)->unknown_input_stream = sf->unknown_input_stream; 77120c24465SJunchao Zhang (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 77220c24465SJunchao Zhang (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 77320c24465SJunchao Zhang #endif 774dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 77520c24465SJunchao Zhang /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 7763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 77795fce210SBarry Smith } 77895fce210SBarry Smith 77995fce210SBarry Smith /*@C 78095fce210SBarry Smith PetscSFGetGraph - Get the graph specifying a parallel star forest 78195fce210SBarry Smith 78295fce210SBarry Smith Not Collective 78395fce210SBarry Smith 7844165533cSJose E. Roman Input Parameter: 78595fce210SBarry Smith . sf - star forest 78695fce210SBarry Smith 7874165533cSJose E. Roman Output Parameters: 78895fce210SBarry Smith + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 78995fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 79020662ed9SBarry Smith . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 79195fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process 79295fce210SBarry Smith 793cab54364SBarry Smith Level: intermediate 794cab54364SBarry Smith 795373e0d91SLisandro Dalcin Notes: 79620662ed9SBarry Smith We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 797373e0d91SLisandro Dalcin 79820662ed9SBarry Smith The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 799db2b9530SVaclav Hapla 8008dbb0df6SBarry Smith Fortran Notes: 80120662ed9SBarry Smith The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 80220662ed9SBarry Smith want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 8038dbb0df6SBarry Smith 80420662ed9SBarry Smith To check for a `NULL` `ilocal` use 8058dbb0df6SBarry Smith $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 806ca797d7aSLawrence Mitchell 80720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 80895fce210SBarry Smith @*/ 809d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 810d71ae5a4SJacob Faibussowitsch { 81195fce210SBarry Smith PetscFunctionBegin; 81295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 813b8dee149SJunchao Zhang if (sf->ops->GetGraph) { 814f4f49eeaSPierre Jolivet PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 815b8dee149SJunchao Zhang } else { 81695fce210SBarry Smith if (nroots) *nroots = sf->nroots; 81795fce210SBarry Smith if (nleaves) *nleaves = sf->nleaves; 81895fce210SBarry Smith if (ilocal) *ilocal = sf->mine; 81995fce210SBarry Smith if (iremote) *iremote = sf->remote; 820b8dee149SJunchao Zhang } 8213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 82295fce210SBarry Smith } 82395fce210SBarry Smith 82429046d53SLisandro Dalcin /*@ 82595fce210SBarry Smith PetscSFGetLeafRange - Get the active leaf ranges 82695fce210SBarry Smith 82795fce210SBarry Smith Not Collective 82895fce210SBarry Smith 8294165533cSJose E. Roman Input Parameter: 83095fce210SBarry Smith . sf - star forest 83195fce210SBarry Smith 8324165533cSJose E. Roman Output Parameters: 83320662ed9SBarry Smith + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 83420662ed9SBarry Smith - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 83595fce210SBarry Smith 83695fce210SBarry Smith Level: developer 83795fce210SBarry Smith 83820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 83995fce210SBarry Smith @*/ 840d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 841d71ae5a4SJacob Faibussowitsch { 84295fce210SBarry Smith PetscFunctionBegin; 84395fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 84429046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 84595fce210SBarry Smith if (minleaf) *minleaf = sf->minleaf; 84695fce210SBarry Smith if (maxleaf) *maxleaf = sf->maxleaf; 8473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 84895fce210SBarry Smith } 84995fce210SBarry Smith 850ffeef943SBarry Smith /*@ 851cab54364SBarry Smith PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 852fe2efc57SMark 85320f4b53cSBarry Smith Collective 854fe2efc57SMark 855fe2efc57SMark Input Parameters: 856fe2efc57SMark + A - the star forest 857cab54364SBarry Smith . obj - Optional object that provides the prefix for the option names 858736c3998SJose E. Roman - name - command line option 859fe2efc57SMark 860fe2efc57SMark Level: intermediate 861cab54364SBarry Smith 86220662ed9SBarry Smith Note: 86320662ed9SBarry Smith See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 86420662ed9SBarry Smith 865db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 866fe2efc57SMark @*/ 867d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 868d71ae5a4SJacob Faibussowitsch { 869fe2efc57SMark PetscFunctionBegin; 870fe2efc57SMark PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 8719566063dSJacob Faibussowitsch PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 8723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 873fe2efc57SMark } 874fe2efc57SMark 875ffeef943SBarry Smith /*@ 87695fce210SBarry Smith PetscSFView - view a star forest 87795fce210SBarry Smith 87895fce210SBarry Smith Collective 87995fce210SBarry Smith 8804165533cSJose E. Roman Input Parameters: 88195fce210SBarry Smith + sf - star forest 882cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 88395fce210SBarry Smith 88495fce210SBarry Smith Level: beginner 88595fce210SBarry Smith 886cab54364SBarry Smith .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 88795fce210SBarry Smith @*/ 888d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 889d71ae5a4SJacob Faibussowitsch { 89095fce210SBarry Smith PetscBool iascii; 89195fce210SBarry Smith PetscViewerFormat format; 89295fce210SBarry Smith 89395fce210SBarry Smith PetscFunctionBegin; 89495fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 8959566063dSJacob Faibussowitsch if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 89695fce210SBarry Smith PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 89795fce210SBarry Smith PetscCheckSameComm(sf, 1, viewer, 2); 8989566063dSJacob Faibussowitsch if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 8999566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 90053dd6d7dSJunchao Zhang if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 90195fce210SBarry Smith PetscMPIInt rank; 9026497c311SBarry Smith PetscInt j; 90395fce210SBarry Smith 9049566063dSJacob Faibussowitsch PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 9059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushTab(viewer)); 906dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 90780153354SVaclav Hapla if (!sf->graphset) { 9089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 9099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer)); 9103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 91180153354SVaclav Hapla } 9129566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 9139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 9146497c311SBarry Smith PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 9156497c311SBarry Smith for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%d,%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, (PetscMPIInt)sf->remote[i].rank, sf->remote[i].index)); 9169566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9179566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 91895fce210SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 91981bfa7aaSJed Brown PetscMPIInt *tmpranks, *perm; 9206497c311SBarry Smith 9219566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 9229566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 9236497c311SBarry Smith for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 9249566063dSJacob Faibussowitsch PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 9259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 9266497c311SBarry Smith for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 9276497c311SBarry Smith PetscMPIInt i = perm[ii]; 9286497c311SBarry Smith 9299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 93048a46eb9SPierre Jolivet for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 93195fce210SBarry Smith } 9329566063dSJacob Faibussowitsch PetscCall(PetscFree2(tmpranks, perm)); 93395fce210SBarry Smith } 9349566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9359566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 936dd5b3ca6SJunchao Zhang } 9379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer)); 93895fce210SBarry Smith } 939dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, View, viewer); 9403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 94195fce210SBarry Smith } 94295fce210SBarry Smith 94395fce210SBarry Smith /*@C 944dec1416fSJunchao Zhang PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 94595fce210SBarry Smith 94695fce210SBarry Smith Not Collective 94795fce210SBarry Smith 9484165533cSJose E. Roman Input Parameter: 94995fce210SBarry Smith . sf - star forest 95095fce210SBarry Smith 9514165533cSJose E. Roman Output Parameters: 95295fce210SBarry Smith + nranks - number of ranks referenced by local part 95320662ed9SBarry Smith . ranks - [`nranks`] array of ranks 95420662ed9SBarry Smith . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 9556497c311SBarry Smith . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL` 9566497c311SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL` 95795fce210SBarry Smith 95895fce210SBarry Smith Level: developer 95995fce210SBarry Smith 960cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 96195fce210SBarry Smith @*/ 9626497c311SBarry Smith PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 963d71ae5a4SJacob Faibussowitsch { 96495fce210SBarry Smith PetscFunctionBegin; 96595fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 96628b400f6SJacob Faibussowitsch PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 967dec1416fSJunchao Zhang if (sf->ops->GetRootRanks) { 9689927e4dfSBarry Smith PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 969dec1416fSJunchao Zhang } else { 970dec1416fSJunchao Zhang /* The generic implementation */ 97195fce210SBarry Smith if (nranks) *nranks = sf->nranks; 97295fce210SBarry Smith if (ranks) *ranks = sf->ranks; 97395fce210SBarry Smith if (roffset) *roffset = sf->roffset; 97495fce210SBarry Smith if (rmine) *rmine = sf->rmine; 97595fce210SBarry Smith if (rremote) *rremote = sf->rremote; 976dec1416fSJunchao Zhang } 9773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 97895fce210SBarry Smith } 97995fce210SBarry Smith 9808750ddebSJunchao Zhang /*@C 9818750ddebSJunchao Zhang PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 9828750ddebSJunchao Zhang 9838750ddebSJunchao Zhang Not Collective 9848750ddebSJunchao Zhang 9854165533cSJose E. Roman Input Parameter: 9868750ddebSJunchao Zhang . sf - star forest 9878750ddebSJunchao Zhang 9884165533cSJose E. Roman Output Parameters: 9898750ddebSJunchao Zhang + niranks - number of leaf ranks referencing roots on this process 99020662ed9SBarry Smith . iranks - [`niranks`] array of ranks 99120662ed9SBarry Smith . ioffset - [`niranks`+1] offset in `irootloc` for each rank 99220662ed9SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 9938750ddebSJunchao Zhang 9948750ddebSJunchao Zhang Level: developer 9958750ddebSJunchao Zhang 996cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()` 9978750ddebSJunchao Zhang @*/ 9986497c311SBarry Smith PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 999d71ae5a4SJacob Faibussowitsch { 10008750ddebSJunchao Zhang PetscFunctionBegin; 10018750ddebSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 100228b400f6SJacob Faibussowitsch PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 10038750ddebSJunchao Zhang if (sf->ops->GetLeafRanks) { 10049927e4dfSBarry Smith PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 10058750ddebSJunchao Zhang } else { 10068750ddebSJunchao Zhang PetscSFType type; 10079566063dSJacob Faibussowitsch PetscCall(PetscSFGetType(sf, &type)); 100898921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 10098750ddebSJunchao Zhang } 10103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10118750ddebSJunchao Zhang } 10128750ddebSJunchao Zhang 1013d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1014d71ae5a4SJacob Faibussowitsch { 1015b5a8e515SJed Brown PetscInt i; 1016b5a8e515SJed Brown for (i = 0; i < n; i++) { 1017b5a8e515SJed Brown if (needle == list[i]) return PETSC_TRUE; 1018b5a8e515SJed Brown } 1019b5a8e515SJed Brown return PETSC_FALSE; 1020b5a8e515SJed Brown } 1021b5a8e515SJed Brown 102295fce210SBarry Smith /*@C 1023cab54364SBarry Smith PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 102421c688dcSJed Brown 102521c688dcSJed Brown Collective 102621c688dcSJed Brown 10274165533cSJose E. Roman Input Parameters: 1028cab54364SBarry Smith + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1029cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 103021c688dcSJed Brown 103121c688dcSJed Brown Level: developer 103221c688dcSJed Brown 1033cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()` 103421c688dcSJed Brown @*/ 1035d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1036d71ae5a4SJacob Faibussowitsch { 1037eec179cfSJacob Faibussowitsch PetscHMapI table; 1038eec179cfSJacob Faibussowitsch PetscHashIter pos; 10396497c311SBarry Smith PetscMPIInt size, groupsize, *groupranks, *ranks; 10406497c311SBarry Smith PetscInt *rcount; 10416497c311SBarry Smith PetscInt irank, sfnrank, ranksi; 10426497c311SBarry Smith PetscMPIInt i, orank = -1; 104321c688dcSJed Brown 104421c688dcSJed Brown PetscFunctionBegin; 104521c688dcSJed Brown PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 104629046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 10479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1048eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(10, &table)); 104921c688dcSJed Brown for (i = 0; i < sf->nleaves; i++) { 105021c688dcSJed Brown /* Log 1-based rank */ 1051eec179cfSJacob Faibussowitsch PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 105221c688dcSJed Brown } 10536497c311SBarry Smith PetscCall(PetscHMapIGetSize(table, &sfnrank)); 10546497c311SBarry Smith PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 10559566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 10569566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1057eec179cfSJacob Faibussowitsch PetscHashIterBegin(table, pos); 105821c688dcSJed Brown for (i = 0; i < sf->nranks; i++) { 10596497c311SBarry Smith PetscHashIterGetKey(table, pos, ranksi); 10606497c311SBarry Smith PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1061eec179cfSJacob Faibussowitsch PetscHashIterGetVal(table, pos, rcount[i]); 1062eec179cfSJacob Faibussowitsch PetscHashIterNext(table, pos); 106321c688dcSJed Brown ranks[i]--; /* Convert back to 0-based */ 106421c688dcSJed Brown } 1065eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&table)); 1066b5a8e515SJed Brown 1067b5a8e515SJed Brown /* We expect that dgroup is reliably "small" while nranks could be large */ 1068b5a8e515SJed Brown { 10697fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 1070b5a8e515SJed Brown PetscMPIInt *dgroupranks; 10716497c311SBarry Smith 10729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 10739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 10749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 10759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(groupsize, &groupranks)); 1076b5a8e515SJed Brown for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 10779566063dSJacob Faibussowitsch if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 10789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 10799566063dSJacob Faibussowitsch PetscCall(PetscFree(dgroupranks)); 1080b5a8e515SJed Brown } 1081b5a8e515SJed Brown 1082b5a8e515SJed Brown /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1083b5a8e515SJed Brown for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1084b5a8e515SJed Brown for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1085b5a8e515SJed Brown if (InList(ranks[i], groupsize, groupranks)) break; 1086b5a8e515SJed Brown } 1087b5a8e515SJed Brown for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1088b5a8e515SJed Brown if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1089b5a8e515SJed Brown } 1090b5a8e515SJed Brown if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 10916497c311SBarry Smith PetscMPIInt tmprank; 10926497c311SBarry Smith PetscInt tmpcount; 1093247e8311SStefano Zampini 1094b5a8e515SJed Brown tmprank = ranks[i]; 1095b5a8e515SJed Brown tmpcount = rcount[i]; 1096b5a8e515SJed Brown ranks[i] = ranks[sf->ndranks]; 1097b5a8e515SJed Brown rcount[i] = rcount[sf->ndranks]; 1098b5a8e515SJed Brown ranks[sf->ndranks] = tmprank; 1099b5a8e515SJed Brown rcount[sf->ndranks] = tmpcount; 1100b5a8e515SJed Brown sf->ndranks++; 1101b5a8e515SJed Brown } 1102b5a8e515SJed Brown } 11039566063dSJacob Faibussowitsch PetscCall(PetscFree(groupranks)); 11046497c311SBarry Smith PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 11056497c311SBarry Smith if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 110621c688dcSJed Brown sf->roffset[0] = 0; 110721c688dcSJed Brown for (i = 0; i < sf->nranks; i++) { 11089566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 110921c688dcSJed Brown sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 111021c688dcSJed Brown rcount[i] = 0; 111121c688dcSJed Brown } 1112247e8311SStefano Zampini for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1113247e8311SStefano Zampini /* short circuit */ 1114247e8311SStefano Zampini if (orank != sf->remote[i].rank) { 111521c688dcSJed Brown /* Search for index of iremote[i].rank in sf->ranks */ 11166497c311SBarry Smith PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1117b5a8e515SJed Brown if (irank < 0) { 11186497c311SBarry Smith PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1119b5a8e515SJed Brown if (irank >= 0) irank += sf->ndranks; 112021c688dcSJed Brown } 11216497c311SBarry Smith orank = (PetscMPIInt)sf->remote[i].rank; 1122247e8311SStefano Zampini } 11236497c311SBarry Smith PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", (PetscMPIInt)sf->remote[i].rank); 112421c688dcSJed Brown sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 112521c688dcSJed Brown sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 112621c688dcSJed Brown rcount[irank]++; 112721c688dcSJed Brown } 11289566063dSJacob Faibussowitsch PetscCall(PetscFree2(rcount, ranks)); 11293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 113021c688dcSJed Brown } 113121c688dcSJed Brown 113221c688dcSJed Brown /*@C 113395fce210SBarry Smith PetscSFGetGroups - gets incoming and outgoing process groups 113495fce210SBarry Smith 113595fce210SBarry Smith Collective 113695fce210SBarry Smith 11374165533cSJose E. Roman Input Parameter: 113895fce210SBarry Smith . sf - star forest 113995fce210SBarry Smith 11404165533cSJose E. Roman Output Parameters: 114195fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots) 114295fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference) 114395fce210SBarry Smith 114495fce210SBarry Smith Level: developer 114595fce210SBarry Smith 1146cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 114795fce210SBarry Smith @*/ 1148d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1149d71ae5a4SJacob Faibussowitsch { 11507fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 115195fce210SBarry Smith 115295fce210SBarry Smith PetscFunctionBegin; 115308401ef6SPierre Jolivet PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 115495fce210SBarry Smith if (sf->ingroup == MPI_GROUP_NULL) { 115595fce210SBarry Smith PetscInt i; 115695fce210SBarry Smith const PetscInt *indegree; 11576497c311SBarry Smith PetscMPIInt rank, *outranks, *inranks, indegree0; 115895fce210SBarry Smith PetscSFNode *remote; 115995fce210SBarry Smith PetscSF bgcount; 116095fce210SBarry Smith 116195fce210SBarry Smith /* Compute the number of incoming ranks */ 11629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nranks, &remote)); 116395fce210SBarry Smith for (i = 0; i < sf->nranks; i++) { 116495fce210SBarry Smith remote[i].rank = sf->ranks[i]; 116595fce210SBarry Smith remote[i].index = 0; 116695fce210SBarry Smith } 11679566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 11689566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 11699566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 11709566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 117195fce210SBarry Smith /* Enumerate the incoming ranks */ 11729566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 11739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 117495fce210SBarry Smith for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 11759566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 11769566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 11779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 11786497c311SBarry Smith PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 11796497c311SBarry Smith PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 11809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 11819566063dSJacob Faibussowitsch PetscCall(PetscFree2(inranks, outranks)); 11829566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&bgcount)); 118395fce210SBarry Smith } 118495fce210SBarry Smith *incoming = sf->ingroup; 118595fce210SBarry Smith 118695fce210SBarry Smith if (sf->outgroup == MPI_GROUP_NULL) { 11879566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 11889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 11899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 119095fce210SBarry Smith } 119195fce210SBarry Smith *outgoing = sf->outgroup; 11923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 119395fce210SBarry Smith } 119495fce210SBarry Smith 119529046d53SLisandro Dalcin /*@ 11960dd791a8SStefano Zampini PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 11970dd791a8SStefano Zampini 11980dd791a8SStefano Zampini Collective 11990dd791a8SStefano Zampini 12000dd791a8SStefano Zampini Input Parameter: 12010dd791a8SStefano Zampini . sf - star forest 12020dd791a8SStefano Zampini 12030dd791a8SStefano Zampini Output Parameter: 12040dd791a8SStefano Zampini . rsf - the star forest with a single root per process to perform communications 12050dd791a8SStefano Zampini 12060dd791a8SStefano Zampini Level: developer 12070dd791a8SStefano Zampini 12080dd791a8SStefano Zampini .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 12090dd791a8SStefano Zampini @*/ 12100dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 12110dd791a8SStefano Zampini { 12120dd791a8SStefano Zampini PetscFunctionBegin; 12130dd791a8SStefano Zampini PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 12140dd791a8SStefano Zampini PetscAssertPointer(rsf, 2); 12150dd791a8SStefano Zampini if (!sf->rankssf) { 12160dd791a8SStefano Zampini PetscSFNode *rremotes; 12170dd791a8SStefano Zampini const PetscMPIInt *ranks; 12186497c311SBarry Smith PetscMPIInt nranks; 12190dd791a8SStefano Zampini 12200dd791a8SStefano Zampini PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 12210dd791a8SStefano Zampini PetscCall(PetscMalloc1(nranks, &rremotes)); 12220dd791a8SStefano Zampini for (PetscInt i = 0; i < nranks; i++) { 12230dd791a8SStefano Zampini rremotes[i].rank = ranks[i]; 12240dd791a8SStefano Zampini rremotes[i].index = 0; 12250dd791a8SStefano Zampini } 12260dd791a8SStefano Zampini PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 12270dd791a8SStefano Zampini PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 12280dd791a8SStefano Zampini } 12290dd791a8SStefano Zampini *rsf = sf->rankssf; 12300dd791a8SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 12310dd791a8SStefano Zampini } 12320dd791a8SStefano Zampini 12330dd791a8SStefano Zampini /*@ 1234cab54364SBarry Smith PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 123595fce210SBarry Smith 123695fce210SBarry Smith Collective 123795fce210SBarry Smith 12384165533cSJose E. Roman Input Parameter: 123995fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex 124095fce210SBarry Smith 12414165533cSJose E. Roman Output Parameter: 124295fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1 124395fce210SBarry Smith 124495fce210SBarry Smith Level: developer 124595fce210SBarry Smith 1246cab54364SBarry Smith Note: 1247cab54364SBarry Smith In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 124895fce210SBarry Smith directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 124995fce210SBarry Smith edge, it is a candidate for future optimization that might involve its removal. 125095fce210SBarry Smith 1251cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 125295fce210SBarry Smith @*/ 1253d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1254d71ae5a4SJacob Faibussowitsch { 125595fce210SBarry Smith PetscFunctionBegin; 125695fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 12574f572ea9SToby Isaac PetscAssertPointer(multi, 2); 125895fce210SBarry Smith if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 12599566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 126095fce210SBarry Smith *multi = sf->multi; 1261013b3241SStefano Zampini sf->multi->multi = sf->multi; 12623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 126395fce210SBarry Smith } 126495fce210SBarry Smith if (!sf->multi) { 126595fce210SBarry Smith const PetscInt *indegree; 12669837ea96SMatthew G. Knepley PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 126795fce210SBarry Smith PetscSFNode *remote; 126829046d53SLisandro Dalcin maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 12699566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 12709566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 12719566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 127295fce210SBarry Smith inoffset[0] = 0; 127395fce210SBarry Smith for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 12749837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) outones[i] = 1; 12759566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 12769566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 127795fce210SBarry Smith for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 127876bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1279ad540459SPierre Jolivet for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 128076bd3646SJed Brown } 12819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &remote)); 128295fce210SBarry Smith for (i = 0; i < sf->nleaves; i++) { 128395fce210SBarry Smith remote[i].rank = sf->remote[i].rank; 128438e7336fSToby Isaac remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 128595fce210SBarry Smith } 12869566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1287013b3241SStefano Zampini sf->multi->multi = sf->multi; 12889566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 128995fce210SBarry Smith if (sf->rankorder) { /* Sort the ranks */ 129095fce210SBarry Smith PetscMPIInt rank; 129195fce210SBarry Smith PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 129295fce210SBarry Smith PetscSFNode *newremote; 12939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 129495fce210SBarry Smith for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 12959566063dSJacob Faibussowitsch PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 12969837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) outranks[i] = rank; 12979566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 12989566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 129995fce210SBarry Smith /* Sort the incoming ranks at each vertex, build the inverse map */ 130095fce210SBarry Smith for (i = 0; i < sf->nroots; i++) { 130195fce210SBarry Smith PetscInt j; 130295fce210SBarry Smith for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 13038e3a54c0SPierre Jolivet PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 130495fce210SBarry Smith for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 130595fce210SBarry Smith } 13069566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 13079566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 13089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 130995fce210SBarry Smith for (i = 0; i < sf->nleaves; i++) { 131095fce210SBarry Smith newremote[i].rank = sf->remote[i].rank; 131101365b40SToby Isaac newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 131295fce210SBarry Smith } 13139566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 13149566063dSJacob Faibussowitsch PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 131595fce210SBarry Smith } 13169566063dSJacob Faibussowitsch PetscCall(PetscFree3(inoffset, outones, outoffset)); 131795fce210SBarry Smith } 131895fce210SBarry Smith *multi = sf->multi; 13193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 132095fce210SBarry Smith } 132195fce210SBarry Smith 132295fce210SBarry Smith /*@C 132320662ed9SBarry Smith PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 132495fce210SBarry Smith 132595fce210SBarry Smith Collective 132695fce210SBarry Smith 13274165533cSJose E. Roman Input Parameters: 132895fce210SBarry Smith + sf - original star forest 1329ba2a7774SJunchao Zhang . nselected - number of selected roots on this process 1330ba2a7774SJunchao Zhang - selected - indices of the selected roots on this process 133195fce210SBarry Smith 13324165533cSJose E. Roman Output Parameter: 1333cd620004SJunchao Zhang . esf - new star forest 133495fce210SBarry Smith 133595fce210SBarry Smith Level: advanced 133695fce210SBarry Smith 133795fce210SBarry Smith Note: 1338cab54364SBarry Smith To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 133995fce210SBarry Smith be done by calling PetscSFGetGraph(). 134095fce210SBarry Smith 1341cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 134295fce210SBarry Smith @*/ 1343d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1344d71ae5a4SJacob Faibussowitsch { 1345cd620004SJunchao Zhang PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1346cd620004SJunchao Zhang const PetscInt *ilocal; 1347cd620004SJunchao Zhang signed char *rootdata, *leafdata, *leafmem; 1348ba2a7774SJunchao Zhang const PetscSFNode *iremote; 1349f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1350f659e5c7SJunchao Zhang MPI_Comm comm; 135195fce210SBarry Smith 135295fce210SBarry Smith PetscFunctionBegin; 135395fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 135429046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 13554f572ea9SToby Isaac if (nselected) PetscAssertPointer(selected, 3); 13564f572ea9SToby Isaac PetscAssertPointer(esf, 4); 13570511a646SMatthew G. Knepley 13589566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 13599566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 13609566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 13619566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1362cd620004SJunchao Zhang 136376bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1364cd620004SJunchao Zhang PetscBool dups; 13659566063dSJacob Faibussowitsch PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 136628b400f6SJacob Faibussowitsch PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1367511e6246SStefano Zampini for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1368cd620004SJunchao Zhang } 1369f659e5c7SJunchao Zhang 1370dbbe0bcdSBarry Smith if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1371dbbe0bcdSBarry Smith else { 1372cd620004SJunchao Zhang /* A generic version of creating embedded sf */ 13739566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1374cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 13759566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 13768e3a54c0SPierre Jolivet leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1377cd620004SJunchao Zhang /* Tag selected roots and bcast to leaves */ 1378cd620004SJunchao Zhang for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 13799566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 13809566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1381ba2a7774SJunchao Zhang 1382cd620004SJunchao Zhang /* Build esf with leaves that are still connected */ 1383cd620004SJunchao Zhang esf_nleaves = 0; 1384cd620004SJunchao Zhang for (i = 0; i < nleaves; i++) { 1385cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1386cd620004SJunchao Zhang /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1387cd620004SJunchao Zhang with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1388cd620004SJunchao Zhang */ 1389cd620004SJunchao Zhang esf_nleaves += (leafdata[j] ? 1 : 0); 1390cd620004SJunchao Zhang } 13919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 13929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1393cd620004SJunchao Zhang for (i = n = 0; i < nleaves; i++) { 1394cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1395cd620004SJunchao Zhang if (leafdata[j]) { 1396cd620004SJunchao Zhang new_ilocal[n] = j; 1397cd620004SJunchao Zhang new_iremote[n].rank = iremote[i].rank; 1398cd620004SJunchao Zhang new_iremote[n].index = iremote[i].index; 1399fc1ede2bSMatthew G. Knepley ++n; 140095fce210SBarry Smith } 140195fce210SBarry Smith } 14029566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, esf)); 14039566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*esf)); 14049566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 14059566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootdata, leafmem)); 1406f659e5c7SJunchao Zhang } 14079566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 14083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 140995fce210SBarry Smith } 141095fce210SBarry Smith 14112f5fb4c2SMatthew G. Knepley /*@C 141220662ed9SBarry Smith PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 14132f5fb4c2SMatthew G. Knepley 14142f5fb4c2SMatthew G. Knepley Collective 14152f5fb4c2SMatthew G. Knepley 14164165533cSJose E. Roman Input Parameters: 14172f5fb4c2SMatthew G. Knepley + sf - original star forest 1418f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process 1419f659e5c7SJunchao Zhang - selected - indices of the selected leaves on this process 14202f5fb4c2SMatthew G. Knepley 14214165533cSJose E. Roman Output Parameter: 14222f5fb4c2SMatthew G. Knepley . newsf - new star forest 14232f5fb4c2SMatthew G. Knepley 14242f5fb4c2SMatthew G. Knepley Level: advanced 14252f5fb4c2SMatthew G. Knepley 1426cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 14272f5fb4c2SMatthew G. Knepley @*/ 1428d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1429d71ae5a4SJacob Faibussowitsch { 1430f659e5c7SJunchao Zhang const PetscSFNode *iremote; 1431f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1432f659e5c7SJunchao Zhang const PetscInt *ilocal; 1433f659e5c7SJunchao Zhang PetscInt i, nroots, *leaves, *new_ilocal; 1434f659e5c7SJunchao Zhang MPI_Comm comm; 14352f5fb4c2SMatthew G. Knepley 14362f5fb4c2SMatthew G. Knepley PetscFunctionBegin; 14372f5fb4c2SMatthew G. Knepley PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 143829046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 14394f572ea9SToby Isaac if (nselected) PetscAssertPointer(selected, 3); 14404f572ea9SToby Isaac PetscAssertPointer(newsf, 4); 14412f5fb4c2SMatthew G. Knepley 1442f659e5c7SJunchao Zhang /* Uniq selected[] and put results in leaves[] */ 14439566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 14449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &leaves)); 14459566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(leaves, selected, nselected)); 14469566063dSJacob Faibussowitsch PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 144708401ef6SPierre Jolivet PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1448f659e5c7SJunchao Zhang 1449f659e5c7SJunchao Zhang /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1450dbbe0bcdSBarry Smith if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1451dbbe0bcdSBarry Smith else { 14529566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 14539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &new_ilocal)); 14549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &new_iremote)); 1455f659e5c7SJunchao Zhang for (i = 0; i < nselected; ++i) { 1456f659e5c7SJunchao Zhang const PetscInt l = leaves[i]; 1457f659e5c7SJunchao Zhang new_ilocal[i] = ilocal ? ilocal[l] : l; 1458f659e5c7SJunchao Zhang new_iremote[i].rank = iremote[l].rank; 1459f659e5c7SJunchao Zhang new_iremote[i].index = iremote[l].index; 14602f5fb4c2SMatthew G. Knepley } 14619566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 14629566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1463f659e5c7SJunchao Zhang } 14649566063dSJacob Faibussowitsch PetscCall(PetscFree(leaves)); 14653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14662f5fb4c2SMatthew G. Knepley } 14672f5fb4c2SMatthew G. Knepley 146895fce210SBarry Smith /*@C 1469cab54364SBarry Smith PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 14703482bfa8SJunchao Zhang 1471c3339decSBarry Smith Collective 14723482bfa8SJunchao Zhang 14734165533cSJose E. Roman Input Parameters: 14743482bfa8SJunchao Zhang + sf - star forest on which to communicate 14753482bfa8SJunchao Zhang . unit - data type associated with each node 14763482bfa8SJunchao Zhang . rootdata - buffer to broadcast 14773482bfa8SJunchao Zhang - op - operation to use for reduction 14783482bfa8SJunchao Zhang 14794165533cSJose E. Roman Output Parameter: 14803482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 14813482bfa8SJunchao Zhang 14823482bfa8SJunchao Zhang Level: intermediate 14833482bfa8SJunchao Zhang 148420662ed9SBarry Smith Note: 148520662ed9SBarry Smith When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1486da81f932SPierre Jolivet are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1487cab54364SBarry Smith use `PetscSFBcastWithMemTypeBegin()` instead. 1488cab54364SBarry Smith 1489cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 14903482bfa8SJunchao Zhang @*/ 1491d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1492d71ae5a4SJacob Faibussowitsch { 1493eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 14943482bfa8SJunchao Zhang 14953482bfa8SJunchao Zhang PetscFunctionBegin; 14963482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 14979566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 14989566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 14999566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 15009566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1501dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 15029566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 15033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15043482bfa8SJunchao Zhang } 15053482bfa8SJunchao Zhang 15063482bfa8SJunchao Zhang /*@C 150720662ed9SBarry Smith PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 150820662ed9SBarry Smith to `PetscSFBcastEnd()` 1509d0295fc0SJunchao Zhang 1510c3339decSBarry Smith Collective 1511d0295fc0SJunchao Zhang 15124165533cSJose E. Roman Input Parameters: 1513d0295fc0SJunchao Zhang + sf - star forest on which to communicate 1514d0295fc0SJunchao Zhang . unit - data type associated with each node 1515d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1516d0295fc0SJunchao Zhang . rootdata - buffer to broadcast 1517d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1518d0295fc0SJunchao Zhang - op - operation to use for reduction 1519d0295fc0SJunchao Zhang 15204165533cSJose E. Roman Output Parameter: 1521d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 1522d0295fc0SJunchao Zhang 1523d0295fc0SJunchao Zhang Level: intermediate 1524d0295fc0SJunchao Zhang 1525cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1526d0295fc0SJunchao Zhang @*/ 1527d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1528d71ae5a4SJacob Faibussowitsch { 1529d0295fc0SJunchao Zhang PetscFunctionBegin; 1530d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15319566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 15329566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1533dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 15349566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 15353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1536d0295fc0SJunchao Zhang } 1537d0295fc0SJunchao Zhang 1538d0295fc0SJunchao Zhang /*@C 153920662ed9SBarry Smith PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 15403482bfa8SJunchao Zhang 15413482bfa8SJunchao Zhang Collective 15423482bfa8SJunchao Zhang 15434165533cSJose E. Roman Input Parameters: 15443482bfa8SJunchao Zhang + sf - star forest 15453482bfa8SJunchao Zhang . unit - data type 15463482bfa8SJunchao Zhang . rootdata - buffer to broadcast 15473482bfa8SJunchao Zhang - op - operation to use for reduction 15483482bfa8SJunchao Zhang 15494165533cSJose E. Roman Output Parameter: 15503482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 15513482bfa8SJunchao Zhang 15523482bfa8SJunchao Zhang Level: intermediate 15533482bfa8SJunchao Zhang 1554cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 15553482bfa8SJunchao Zhang @*/ 1556d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1557d71ae5a4SJacob Faibussowitsch { 15583482bfa8SJunchao Zhang PetscFunctionBegin; 15593482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15609566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1561dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 15629566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 15633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15643482bfa8SJunchao Zhang } 15653482bfa8SJunchao Zhang 15663482bfa8SJunchao Zhang /*@C 1567cab54364SBarry Smith PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 156895fce210SBarry Smith 156995fce210SBarry Smith Collective 157095fce210SBarry Smith 15714165533cSJose E. Roman Input Parameters: 157295fce210SBarry Smith + sf - star forest 157395fce210SBarry Smith . unit - data type 157495fce210SBarry Smith . leafdata - values to reduce 157595fce210SBarry Smith - op - reduction operation 157695fce210SBarry Smith 15774165533cSJose E. Roman Output Parameter: 157895fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 157995fce210SBarry Smith 158095fce210SBarry Smith Level: intermediate 158195fce210SBarry Smith 158220662ed9SBarry Smith Note: 158320662ed9SBarry Smith When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1584da81f932SPierre Jolivet are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1585cab54364SBarry Smith use `PetscSFReduceWithMemTypeBegin()` instead. 1586d0295fc0SJunchao Zhang 158720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 158895fce210SBarry Smith @*/ 1589d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1590d71ae5a4SJacob Faibussowitsch { 1591eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 159295fce210SBarry Smith 159395fce210SBarry Smith PetscFunctionBegin; 159495fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15959566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 15969566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 15979566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 15989566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1599f4f49eeaSPierre Jolivet PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 16009566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 16013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 160295fce210SBarry Smith } 160395fce210SBarry Smith 160495fce210SBarry Smith /*@C 1605cab54364SBarry Smith PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1606d0295fc0SJunchao Zhang 1607d0295fc0SJunchao Zhang Collective 1608d0295fc0SJunchao Zhang 16094165533cSJose E. Roman Input Parameters: 1610d0295fc0SJunchao Zhang + sf - star forest 1611d0295fc0SJunchao Zhang . unit - data type 1612d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1613d0295fc0SJunchao Zhang . leafdata - values to reduce 1614d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1615d0295fc0SJunchao Zhang - op - reduction operation 1616d0295fc0SJunchao Zhang 16174165533cSJose E. Roman Output Parameter: 1618d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root 1619d0295fc0SJunchao Zhang 1620d0295fc0SJunchao Zhang Level: intermediate 1621d0295fc0SJunchao Zhang 162220662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1623d0295fc0SJunchao Zhang @*/ 1624d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1625d71ae5a4SJacob Faibussowitsch { 1626d0295fc0SJunchao Zhang PetscFunctionBegin; 1627d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16289566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 16299566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1630f4f49eeaSPierre Jolivet PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 16319566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 16323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1633d0295fc0SJunchao Zhang } 1634d0295fc0SJunchao Zhang 1635d0295fc0SJunchao Zhang /*@C 163620662ed9SBarry Smith PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 163795fce210SBarry Smith 163895fce210SBarry Smith Collective 163995fce210SBarry Smith 16404165533cSJose E. Roman Input Parameters: 164195fce210SBarry Smith + sf - star forest 164295fce210SBarry Smith . unit - data type 164395fce210SBarry Smith . leafdata - values to reduce 164495fce210SBarry Smith - op - reduction operation 164595fce210SBarry Smith 16464165533cSJose E. Roman Output Parameter: 164795fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 164895fce210SBarry Smith 164995fce210SBarry Smith Level: intermediate 165095fce210SBarry Smith 165120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 165295fce210SBarry Smith @*/ 1653d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1654d71ae5a4SJacob Faibussowitsch { 165595fce210SBarry Smith PetscFunctionBegin; 165695fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16579566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1658dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 16599566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 16603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 166195fce210SBarry Smith } 166295fce210SBarry Smith 166395fce210SBarry Smith /*@C 1664cab54364SBarry Smith PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1665cab54364SBarry Smith to be completed with `PetscSFFetchAndOpEnd()` 1666a1729e3fSJunchao Zhang 1667a1729e3fSJunchao Zhang Collective 1668a1729e3fSJunchao Zhang 16694165533cSJose E. Roman Input Parameters: 1670a1729e3fSJunchao Zhang + sf - star forest 1671a1729e3fSJunchao Zhang . unit - data type 1672a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1673a1729e3fSJunchao Zhang - op - operation to use for reduction 1674a1729e3fSJunchao Zhang 16754165533cSJose E. Roman Output Parameters: 1676a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1677a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1678a1729e3fSJunchao Zhang 1679a1729e3fSJunchao Zhang Level: advanced 1680a1729e3fSJunchao Zhang 1681a1729e3fSJunchao Zhang Note: 1682a1729e3fSJunchao Zhang The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1683a1729e3fSJunchao Zhang might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1684a1729e3fSJunchao Zhang not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1685a1729e3fSJunchao Zhang integers. 1686a1729e3fSJunchao Zhang 1687cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1688a1729e3fSJunchao Zhang @*/ 1689d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1690d71ae5a4SJacob Faibussowitsch { 1691eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype, leafupdatemtype; 1692a1729e3fSJunchao Zhang 1693a1729e3fSJunchao Zhang PetscFunctionBegin; 1694a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16959566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 16969566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 16979566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 16989566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 16999566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 170008401ef6SPierre Jolivet PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1701dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 17029566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 17033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1704a1729e3fSJunchao Zhang } 1705a1729e3fSJunchao Zhang 1706a1729e3fSJunchao Zhang /*@C 1707cab54364SBarry Smith PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1708cab54364SBarry Smith applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1709d3b3e55cSJunchao Zhang 1710d3b3e55cSJunchao Zhang Collective 1711d3b3e55cSJunchao Zhang 1712d3b3e55cSJunchao Zhang Input Parameters: 1713d3b3e55cSJunchao Zhang + sf - star forest 1714d3b3e55cSJunchao Zhang . unit - data type 1715d3b3e55cSJunchao Zhang . rootmtype - memory type of rootdata 1716d3b3e55cSJunchao Zhang . leafmtype - memory type of leafdata 1717d3b3e55cSJunchao Zhang . leafdata - leaf values to use in reduction 1718d3b3e55cSJunchao Zhang . leafupdatemtype - memory type of leafupdate 1719d3b3e55cSJunchao Zhang - op - operation to use for reduction 1720d3b3e55cSJunchao Zhang 1721d3b3e55cSJunchao Zhang Output Parameters: 1722d3b3e55cSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1723d3b3e55cSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1724d3b3e55cSJunchao Zhang 1725d3b3e55cSJunchao Zhang Level: advanced 1726d3b3e55cSJunchao Zhang 1727cab54364SBarry Smith Note: 1728cab54364SBarry Smith See `PetscSFFetchAndOpBegin()` for more details. 1729d3b3e55cSJunchao Zhang 173020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1731d3b3e55cSJunchao Zhang @*/ 1732d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1733d71ae5a4SJacob Faibussowitsch { 1734d3b3e55cSJunchao Zhang PetscFunctionBegin; 1735d3b3e55cSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 17369566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 17379566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 173808401ef6SPierre Jolivet PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1739dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 17409566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 17413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1742d3b3e55cSJunchao Zhang } 1743d3b3e55cSJunchao Zhang 1744d3b3e55cSJunchao Zhang /*@C 174520662ed9SBarry Smith PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 174620662ed9SBarry Smith to fetch values from roots and update atomically by applying operation using my leaf value 1747a1729e3fSJunchao Zhang 1748a1729e3fSJunchao Zhang Collective 1749a1729e3fSJunchao Zhang 17504165533cSJose E. Roman Input Parameters: 1751a1729e3fSJunchao Zhang + sf - star forest 1752a1729e3fSJunchao Zhang . unit - data type 1753a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1754a1729e3fSJunchao Zhang - op - operation to use for reduction 1755a1729e3fSJunchao Zhang 17564165533cSJose E. Roman Output Parameters: 1757a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1758a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1759a1729e3fSJunchao Zhang 1760a1729e3fSJunchao Zhang Level: advanced 1761a1729e3fSJunchao Zhang 176220662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1763a1729e3fSJunchao Zhang @*/ 1764d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1765d71ae5a4SJacob Faibussowitsch { 1766a1729e3fSJunchao Zhang PetscFunctionBegin; 1767a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 17689566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1769dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 17709566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 17713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1772a1729e3fSJunchao Zhang } 1773a1729e3fSJunchao Zhang 1774a1729e3fSJunchao Zhang /*@C 1775cab54364SBarry Smith PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 177695fce210SBarry Smith 177795fce210SBarry Smith Collective 177895fce210SBarry Smith 17794165533cSJose E. Roman Input Parameter: 178095fce210SBarry Smith . sf - star forest 178195fce210SBarry Smith 17824165533cSJose E. Roman Output Parameter: 178395fce210SBarry Smith . degree - degree of each root vertex 178495fce210SBarry Smith 178595fce210SBarry Smith Level: advanced 178695fce210SBarry Smith 1787cab54364SBarry Smith Note: 178820662ed9SBarry Smith The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1789ffe67aa5SVáclav Hapla 1790cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 179195fce210SBarry Smith @*/ 17926497c311SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1793d71ae5a4SJacob Faibussowitsch { 179495fce210SBarry Smith PetscFunctionBegin; 179595fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 179695fce210SBarry Smith PetscSFCheckGraphSet(sf, 1); 17974f572ea9SToby Isaac PetscAssertPointer(degree, 2); 1798803bd9e8SMatthew G. Knepley if (!sf->degreeknown) { 17995b0d146aSStefano Zampini PetscInt i, nroots = sf->nroots, maxlocal; 180028b400f6SJacob Faibussowitsch PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 18015b0d146aSStefano Zampini maxlocal = sf->maxleaf - sf->minleaf + 1; 18029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nroots, &sf->degree)); 18039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 180429046d53SLisandro Dalcin for (i = 0; i < nroots; i++) sf->degree[i] = 0; 18059837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 18069566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 180795fce210SBarry Smith } 180895fce210SBarry Smith *degree = NULL; 18093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 181095fce210SBarry Smith } 181195fce210SBarry Smith 181295fce210SBarry Smith /*@C 1813cab54364SBarry Smith PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 181495fce210SBarry Smith 181595fce210SBarry Smith Collective 181695fce210SBarry Smith 18174165533cSJose E. Roman Input Parameter: 181895fce210SBarry Smith . sf - star forest 181995fce210SBarry Smith 18204165533cSJose E. Roman Output Parameter: 182195fce210SBarry Smith . degree - degree of each root vertex 182295fce210SBarry Smith 182395fce210SBarry Smith Level: developer 182495fce210SBarry Smith 1825cab54364SBarry Smith Note: 182620662ed9SBarry Smith The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1827ffe67aa5SVáclav Hapla 1828cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 182995fce210SBarry Smith @*/ 1830d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1831d71ae5a4SJacob Faibussowitsch { 183295fce210SBarry Smith PetscFunctionBegin; 183395fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 183495fce210SBarry Smith PetscSFCheckGraphSet(sf, 1); 18354f572ea9SToby Isaac PetscAssertPointer(degree, 2); 183695fce210SBarry Smith if (!sf->degreeknown) { 183728b400f6SJacob Faibussowitsch PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 18389566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 18399566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->degreetmp)); 184095fce210SBarry Smith sf->degreeknown = PETSC_TRUE; 184195fce210SBarry Smith } 184295fce210SBarry Smith *degree = sf->degree; 18433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 184495fce210SBarry Smith } 184595fce210SBarry Smith 1846673100f5SVaclav Hapla /*@C 184720662ed9SBarry Smith PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 184866dfcd1aSVaclav Hapla Each multi-root is assigned index of the corresponding original root. 1849673100f5SVaclav Hapla 1850673100f5SVaclav Hapla Collective 1851673100f5SVaclav Hapla 18524165533cSJose E. Roman Input Parameters: 1853673100f5SVaclav Hapla + sf - star forest 1854cab54364SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1855673100f5SVaclav Hapla 18564165533cSJose E. Roman Output Parameters: 185720662ed9SBarry Smith + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 185820662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1859673100f5SVaclav Hapla 1860673100f5SVaclav Hapla Level: developer 1861673100f5SVaclav Hapla 1862cab54364SBarry Smith Note: 186320662ed9SBarry Smith The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1864ffe67aa5SVáclav Hapla 1865cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1866673100f5SVaclav Hapla @*/ 1867d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1868d71ae5a4SJacob Faibussowitsch { 1869673100f5SVaclav Hapla PetscSF msf; 187063bfac88SBarry Smith PetscInt k = 0, nroots, nmroots; 1871673100f5SVaclav Hapla 1872673100f5SVaclav Hapla PetscFunctionBegin; 1873673100f5SVaclav Hapla PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 18749566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 18754f572ea9SToby Isaac if (nroots) PetscAssertPointer(degree, 2); 18764f572ea9SToby Isaac if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 18774f572ea9SToby Isaac PetscAssertPointer(multiRootsOrigNumbering, 4); 18789566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &msf)); 18799566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 18809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 188163bfac88SBarry Smith for (PetscInt i = 0; i < nroots; i++) { 1882673100f5SVaclav Hapla if (!degree[i]) continue; 188363bfac88SBarry Smith for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1884673100f5SVaclav Hapla } 188508401ef6SPierre Jolivet PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 188666dfcd1aSVaclav Hapla if (nMultiRoots) *nMultiRoots = nmroots; 18873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1888673100f5SVaclav Hapla } 1889673100f5SVaclav Hapla 189095fce210SBarry Smith /*@C 1891cab54364SBarry Smith PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 189295fce210SBarry Smith 189395fce210SBarry Smith Collective 189495fce210SBarry Smith 18954165533cSJose E. Roman Input Parameters: 189695fce210SBarry Smith + sf - star forest 189795fce210SBarry Smith . unit - data type 189895fce210SBarry Smith - leafdata - leaf data to gather to roots 189995fce210SBarry Smith 19004165533cSJose E. Roman Output Parameter: 190195fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 190295fce210SBarry Smith 190395fce210SBarry Smith Level: intermediate 190495fce210SBarry Smith 1905cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 190695fce210SBarry Smith @*/ 1907d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1908d71ae5a4SJacob Faibussowitsch { 1909a5526d50SJunchao Zhang PetscSF multi = NULL; 191095fce210SBarry Smith 191195fce210SBarry Smith PetscFunctionBegin; 191295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 19139566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 19149566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 19159566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 19163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 191795fce210SBarry Smith } 191895fce210SBarry Smith 191995fce210SBarry Smith /*@C 1920cab54364SBarry Smith PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 192195fce210SBarry Smith 192295fce210SBarry Smith Collective 192395fce210SBarry Smith 19244165533cSJose E. Roman Input Parameters: 192595fce210SBarry Smith + sf - star forest 192695fce210SBarry Smith . unit - data type 192795fce210SBarry Smith - leafdata - leaf data to gather to roots 192895fce210SBarry Smith 19294165533cSJose E. Roman Output Parameter: 193095fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 193195fce210SBarry Smith 193295fce210SBarry Smith Level: intermediate 193395fce210SBarry Smith 1934cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 193595fce210SBarry Smith @*/ 1936d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1937d71ae5a4SJacob Faibussowitsch { 1938a5526d50SJunchao Zhang PetscSF multi = NULL; 193995fce210SBarry Smith 194095fce210SBarry Smith PetscFunctionBegin; 194195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 19429566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 19439566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 19443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 194595fce210SBarry Smith } 194695fce210SBarry Smith 194795fce210SBarry Smith /*@C 1948cab54364SBarry Smith PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 194995fce210SBarry Smith 195095fce210SBarry Smith Collective 195195fce210SBarry Smith 19524165533cSJose E. Roman Input Parameters: 195395fce210SBarry Smith + sf - star forest 195495fce210SBarry Smith . unit - data type 195595fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 195695fce210SBarry Smith 19574165533cSJose E. Roman Output Parameter: 195895fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 195995fce210SBarry Smith 196095fce210SBarry Smith Level: intermediate 196195fce210SBarry Smith 196220662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 196395fce210SBarry Smith @*/ 1964d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1965d71ae5a4SJacob Faibussowitsch { 1966a5526d50SJunchao Zhang PetscSF multi = NULL; 196795fce210SBarry Smith 196895fce210SBarry Smith PetscFunctionBegin; 196995fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 19709566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 19719566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 19729566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 19733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 197495fce210SBarry Smith } 197595fce210SBarry Smith 197695fce210SBarry Smith /*@C 1977cab54364SBarry Smith PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 197895fce210SBarry Smith 197995fce210SBarry Smith Collective 198095fce210SBarry Smith 19814165533cSJose E. Roman Input Parameters: 198295fce210SBarry Smith + sf - star forest 198395fce210SBarry Smith . unit - data type 198495fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 198595fce210SBarry Smith 19864165533cSJose E. Roman Output Parameter: 198795fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 198895fce210SBarry Smith 198995fce210SBarry Smith Level: intermediate 199095fce210SBarry Smith 199120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 199295fce210SBarry Smith @*/ 1993d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1994d71ae5a4SJacob Faibussowitsch { 1995a5526d50SJunchao Zhang PetscSF multi = NULL; 199695fce210SBarry Smith 199795fce210SBarry Smith PetscFunctionBegin; 199895fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 19999566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 20009566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 20013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 200295fce210SBarry Smith } 2003a7b3aa13SAta Mesgarnejad 2004d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 2005d71ae5a4SJacob Faibussowitsch { 2006a072220fSLawrence Mitchell PetscInt i, n, nleaves; 2007a072220fSLawrence Mitchell const PetscInt *ilocal = NULL; 2008a072220fSLawrence Mitchell PetscHSetI seen; 2009a072220fSLawrence Mitchell 2010a072220fSLawrence Mitchell PetscFunctionBegin; 2011b458e8f1SJose E. Roman if (PetscDefined(USE_DEBUG)) { 20129566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 20139566063dSJacob Faibussowitsch PetscCall(PetscHSetICreate(&seen)); 2014a072220fSLawrence Mitchell for (i = 0; i < nleaves; i++) { 2015a072220fSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 20169566063dSJacob Faibussowitsch PetscCall(PetscHSetIAdd(seen, leaf)); 2017a072220fSLawrence Mitchell } 20189566063dSJacob Faibussowitsch PetscCall(PetscHSetIGetSize(seen, &n)); 201908401ef6SPierre Jolivet PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 20209566063dSJacob Faibussowitsch PetscCall(PetscHSetIDestroy(&seen)); 2021b458e8f1SJose E. Roman } 20223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2023a072220fSLawrence Mitchell } 202454729392SStefano Zampini 2025a7b3aa13SAta Mesgarnejad /*@ 2026cab54364SBarry Smith PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2027a7b3aa13SAta Mesgarnejad 2028a7b3aa13SAta Mesgarnejad Input Parameters: 2029cab54364SBarry Smith + sfA - The first `PetscSF` 2030cab54364SBarry Smith - sfB - The second `PetscSF` 2031a7b3aa13SAta Mesgarnejad 20322fe279fdSBarry Smith Output Parameter: 2033cab54364SBarry Smith . sfBA - The composite `PetscSF` 2034a7b3aa13SAta Mesgarnejad 2035a7b3aa13SAta Mesgarnejad Level: developer 2036a7b3aa13SAta Mesgarnejad 2037a072220fSLawrence Mitchell Notes: 2038cab54364SBarry Smith Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 203954729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. 204054729392SStefano Zampini 204120662ed9SBarry Smith `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 204220662ed9SBarry Smith a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 204320662ed9SBarry Smith nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 204420662ed9SBarry Smith `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2045a072220fSLawrence Mitchell 2046db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2047a7b3aa13SAta Mesgarnejad @*/ 2048d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2049d71ae5a4SJacob Faibussowitsch { 2050a7b3aa13SAta Mesgarnejad const PetscSFNode *remotePointsA, *remotePointsB; 2051d41018fbSJunchao Zhang PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 205254729392SStefano Zampini const PetscInt *localPointsA, *localPointsB; 205354729392SStefano Zampini PetscInt *localPointsBA; 205454729392SStefano Zampini PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 205554729392SStefano Zampini PetscBool denseB; 2056a7b3aa13SAta Mesgarnejad 2057a7b3aa13SAta Mesgarnejad PetscFunctionBegin; 2058a7b3aa13SAta Mesgarnejad PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 205929046d53SLisandro Dalcin PetscSFCheckGraphSet(sfA, 1); 206029046d53SLisandro Dalcin PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 206129046d53SLisandro Dalcin PetscSFCheckGraphSet(sfB, 2); 206254729392SStefano Zampini PetscCheckSameComm(sfA, 1, sfB, 2); 20634f572ea9SToby Isaac PetscAssertPointer(sfBA, 3); 20649566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 20659566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 206654729392SStefano Zampini 20679566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 20689566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 206920662ed9SBarry Smith /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 207020662ed9SBarry Smith numRootsB; otherwise, garbage will be broadcasted. 207120662ed9SBarry Smith Example (comm size = 1): 207220662ed9SBarry Smith sfA: 0 <- (0, 0) 207320662ed9SBarry Smith sfB: 100 <- (0, 0) 207420662ed9SBarry Smith 101 <- (0, 1) 207520662ed9SBarry Smith Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 207620662ed9SBarry Smith of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 207720662ed9SBarry Smith receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 207820662ed9SBarry Smith remotePointsA; if not recasted, point 101 would receive a garbage value. */ 20799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 208054729392SStefano Zampini for (i = 0; i < numRootsB; i++) { 208154729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 208254729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 208354729392SStefano Zampini } 208454729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 20850ea77edaSksagiyam PetscInt localp = localPointsA ? localPointsA[i] : i; 20860ea77edaSksagiyam 20870ea77edaSksagiyam if (localp >= numRootsB) continue; 20880ea77edaSksagiyam reorderedRemotePointsA[localp] = remotePointsA[i]; 208954729392SStefano Zampini } 2090d41018fbSJunchao Zhang remotePointsA = reorderedRemotePointsA; 20919566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 20929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 20930ea77edaSksagiyam for (i = 0; i < maxleaf - minleaf + 1; i++) { 20940ea77edaSksagiyam leafdataB[i].rank = -1; 20950ea77edaSksagiyam leafdataB[i].index = -1; 20960ea77edaSksagiyam } 20976497c311SBarry Smith PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 20986497c311SBarry Smith PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 20999566063dSJacob Faibussowitsch PetscCall(PetscFree(reorderedRemotePointsA)); 2100d41018fbSJunchao Zhang 210154729392SStefano Zampini denseB = (PetscBool)!localPointsB; 210254729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 210354729392SStefano Zampini if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 210454729392SStefano Zampini else numLeavesBA++; 210554729392SStefano Zampini } 210654729392SStefano Zampini if (denseB) { 2107d41018fbSJunchao Zhang localPointsBA = NULL; 2108d41018fbSJunchao Zhang remotePointsBA = leafdataB; 2109d41018fbSJunchao Zhang } else { 21109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 21119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 211254729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 211354729392SStefano Zampini const PetscInt l = localPointsB ? localPointsB[i] : i; 211454729392SStefano Zampini 211554729392SStefano Zampini if (leafdataB[l - minleaf].rank == -1) continue; 211654729392SStefano Zampini remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 211754729392SStefano Zampini localPointsBA[numLeavesBA] = l; 211854729392SStefano Zampini numLeavesBA++; 211954729392SStefano Zampini } 21209566063dSJacob Faibussowitsch PetscCall(PetscFree(leafdataB)); 2121d41018fbSJunchao Zhang } 21229566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 21239566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*sfBA)); 21249566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 21253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2126a7b3aa13SAta Mesgarnejad } 21271c6ba672SJunchao Zhang 212804c0ada0SJunchao Zhang /*@ 2129cab54364SBarry Smith PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 213004c0ada0SJunchao Zhang 213104c0ada0SJunchao Zhang Input Parameters: 2132cab54364SBarry Smith + sfA - The first `PetscSF` 2133cab54364SBarry Smith - sfB - The second `PetscSF` 213404c0ada0SJunchao Zhang 21352fe279fdSBarry Smith Output Parameter: 2136cab54364SBarry Smith . sfBA - The composite `PetscSF`. 213704c0ada0SJunchao Zhang 213804c0ada0SJunchao Zhang Level: developer 213904c0ada0SJunchao Zhang 214054729392SStefano Zampini Notes: 214120662ed9SBarry Smith Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 214254729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 214320662ed9SBarry Smith second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 214454729392SStefano Zampini 214520662ed9SBarry Smith `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 214620662ed9SBarry Smith a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 214720662ed9SBarry Smith roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 214820662ed9SBarry Smith on `sfA`, then 214920662ed9SBarry Smith a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 215054729392SStefano Zampini 2151db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 215204c0ada0SJunchao Zhang @*/ 2153d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2154d71ae5a4SJacob Faibussowitsch { 215504c0ada0SJunchao Zhang const PetscSFNode *remotePointsA, *remotePointsB; 215604c0ada0SJunchao Zhang PetscSFNode *remotePointsBA; 215704c0ada0SJunchao Zhang const PetscInt *localPointsA, *localPointsB; 215854729392SStefano Zampini PetscSFNode *reorderedRemotePointsA = NULL; 215954729392SStefano Zampini PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 21605b0d146aSStefano Zampini MPI_Op op; 21615b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 21625b0d146aSStefano Zampini PetscBool iswin; 21635b0d146aSStefano Zampini #endif 216404c0ada0SJunchao Zhang 216504c0ada0SJunchao Zhang PetscFunctionBegin; 216604c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 216704c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfA, 1); 216804c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 216904c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfB, 2); 217054729392SStefano Zampini PetscCheckSameComm(sfA, 1, sfB, 2); 21714f572ea9SToby Isaac PetscAssertPointer(sfBA, 3); 21729566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 21739566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 217454729392SStefano Zampini 21759566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 21769566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 21775b0d146aSStefano Zampini 21785b0d146aSStefano Zampini /* TODO: Check roots of sfB have degree of 1 */ 21795b0d146aSStefano Zampini /* Once we implement it, we can replace the MPI_MAXLOC 218083df288dSJunchao Zhang with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 21815b0d146aSStefano Zampini We use MPI_MAXLOC only to have a deterministic output from this routine if 21825b0d146aSStefano Zampini the root condition is not meet. 21835b0d146aSStefano Zampini */ 21845b0d146aSStefano Zampini op = MPI_MAXLOC; 21855b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 21865b0d146aSStefano Zampini /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 21879566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 218883df288dSJunchao Zhang if (iswin) op = MPI_REPLACE; 21895b0d146aSStefano Zampini #endif 21905b0d146aSStefano Zampini 21919566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 21929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 219354729392SStefano Zampini for (i = 0; i < maxleaf - minleaf + 1; i++) { 219454729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 219554729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 219654729392SStefano Zampini } 219754729392SStefano Zampini if (localPointsA) { 219854729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 219954729392SStefano Zampini if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 220054729392SStefano Zampini reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 220154729392SStefano Zampini } 220254729392SStefano Zampini } else { 220354729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 220454729392SStefano Zampini if (i > maxleaf || i < minleaf) continue; 220554729392SStefano Zampini reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 220654729392SStefano Zampini } 220754729392SStefano Zampini } 220854729392SStefano Zampini 22099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 22109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 221154729392SStefano Zampini for (i = 0; i < numRootsB; i++) { 221254729392SStefano Zampini remotePointsBA[i].rank = -1; 221354729392SStefano Zampini remotePointsBA[i].index = -1; 221454729392SStefano Zampini } 221554729392SStefano Zampini 22166497c311SBarry Smith PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 22176497c311SBarry Smith PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 22189566063dSJacob Faibussowitsch PetscCall(PetscFree(reorderedRemotePointsA)); 221954729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 222054729392SStefano Zampini if (remotePointsBA[i].rank == -1) continue; 222154729392SStefano Zampini remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 222254729392SStefano Zampini remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 222354729392SStefano Zampini localPointsBA[numLeavesBA] = i; 222454729392SStefano Zampini numLeavesBA++; 222554729392SStefano Zampini } 22269566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 22279566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*sfBA)); 22289566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 22293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 223004c0ada0SJunchao Zhang } 223104c0ada0SJunchao Zhang 22321c6ba672SJunchao Zhang /* 2233cab54364SBarry Smith PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 22341c6ba672SJunchao Zhang 22352fe279fdSBarry Smith Input Parameter: 2236cab54364SBarry Smith . sf - The global `PetscSF` 22371c6ba672SJunchao Zhang 22382fe279fdSBarry Smith Output Parameter: 2239cab54364SBarry Smith . out - The local `PetscSF` 2240cab54364SBarry Smith 2241cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()` 22421c6ba672SJunchao Zhang */ 2243d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2244d71ae5a4SJacob Faibussowitsch { 22451c6ba672SJunchao Zhang MPI_Comm comm; 22461c6ba672SJunchao Zhang PetscMPIInt myrank; 22471c6ba672SJunchao Zhang const PetscInt *ilocal; 22481c6ba672SJunchao Zhang const PetscSFNode *iremote; 22491c6ba672SJunchao Zhang PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 22501c6ba672SJunchao Zhang PetscSFNode *liremote; 22511c6ba672SJunchao Zhang PetscSF lsf; 22521c6ba672SJunchao Zhang 22531c6ba672SJunchao Zhang PetscFunctionBegin; 22541c6ba672SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2255dbbe0bcdSBarry Smith if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2256dbbe0bcdSBarry Smith else { 22571c6ba672SJunchao Zhang /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 22589566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 22599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 22601c6ba672SJunchao Zhang 22611c6ba672SJunchao Zhang /* Find out local edges and build a local SF */ 22629566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 22639371c9d4SSatish Balay for (i = lnleaves = 0; i < nleaves; i++) { 22649371c9d4SSatish Balay if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 22659371c9d4SSatish Balay } 22669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lnleaves, &lilocal)); 22679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lnleaves, &liremote)); 22681c6ba672SJunchao Zhang 22691c6ba672SJunchao Zhang for (i = j = 0; i < nleaves; i++) { 22701c6ba672SJunchao Zhang if (iremote[i].rank == (PetscInt)myrank) { 22711c6ba672SJunchao Zhang lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 22721c6ba672SJunchao Zhang liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 22731c6ba672SJunchao Zhang liremote[j].index = iremote[i].index; 22741c6ba672SJunchao Zhang j++; 22751c6ba672SJunchao Zhang } 22761c6ba672SJunchao Zhang } 22779566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 22789566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(lsf)); 22799566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 22809566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(lsf)); 22811c6ba672SJunchao Zhang *out = lsf; 22821c6ba672SJunchao Zhang } 22833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 22841c6ba672SJunchao Zhang } 2285dd5b3ca6SJunchao Zhang 2286dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2287d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2288d71ae5a4SJacob Faibussowitsch { 2289eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 2290dd5b3ca6SJunchao Zhang 2291dd5b3ca6SJunchao Zhang PetscFunctionBegin; 2292dd5b3ca6SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 22939566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 22949566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 22959566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 22969566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2297dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 22989566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 22993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2300dd5b3ca6SJunchao Zhang } 2301dd5b3ca6SJunchao Zhang 2302157edd7aSVaclav Hapla /*@ 2303cab54364SBarry Smith PetscSFConcatenate - concatenate multiple `PetscSF` into one 2304157edd7aSVaclav Hapla 2305157edd7aSVaclav Hapla Input Parameters: 2306157edd7aSVaclav Hapla + comm - the communicator 2307cab54364SBarry Smith . nsfs - the number of input `PetscSF` 2308cab54364SBarry Smith . sfs - the array of input `PetscSF` 23091f40158dSVaclav Hapla . rootMode - the root mode specifying how roots are handled 231020662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2311157edd7aSVaclav Hapla 23122fe279fdSBarry Smith Output Parameter: 2313cab54364SBarry Smith . newsf - The resulting `PetscSF` 2314157edd7aSVaclav Hapla 23151f40158dSVaclav Hapla Level: advanced 2316157edd7aSVaclav Hapla 2317157edd7aSVaclav Hapla Notes: 231820662ed9SBarry Smith The communicator of all `PetscSF`s in `sfs` must be comm. 2319157edd7aSVaclav Hapla 232020662ed9SBarry Smith Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 232120662ed9SBarry Smith 232220662ed9SBarry Smith The offsets in `leafOffsets` are added to the original leaf indices. 232320662ed9SBarry Smith 232420662ed9SBarry Smith If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 232520662ed9SBarry Smith In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 232620662ed9SBarry Smith 232720662ed9SBarry Smith If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2328157edd7aSVaclav Hapla In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2329157edd7aSVaclav Hapla 233020662ed9SBarry Smith All root modes retain the essential connectivity condition. 233120662ed9SBarry Smith If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 233220662ed9SBarry Smith Parameter `rootMode` controls how the input root spaces are combined. 233320662ed9SBarry Smith For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 233420662ed9SBarry Smith and is also the same in the output `PetscSF`. 23351f40158dSVaclav Hapla For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 23361f40158dSVaclav Hapla `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 233720662ed9SBarry Smith roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 23381f40158dSVaclav Hapla `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 23391593df67SStefano Zampini roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 23401f40158dSVaclav Hapla the original root ranks are ignored. 23411f40158dSVaclav Hapla For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 234220662ed9SBarry Smith the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 234320662ed9SBarry Smith to keep the load balancing. 234420662ed9SBarry Smith However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 23451f40158dSVaclav Hapla 23461f40158dSVaclav Hapla Example: 23471f40158dSVaclav Hapla We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 234820662ed9SBarry Smith .vb 234920662ed9SBarry Smith make -C $PETSC_DIR/src/vec/is/sf/tests ex18 235020662ed9SBarry Smith for m in {local,global,shared}; do 235120662ed9SBarry Smith mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 235220662ed9SBarry Smith done 235320662ed9SBarry Smith .ve 235420662ed9SBarry Smith we generate two identical `PetscSF`s sf_0 and sf_1, 235520662ed9SBarry Smith .vb 235620662ed9SBarry Smith PetscSF Object: sf_0 2 MPI processes 235720662ed9SBarry Smith type: basic 235820662ed9SBarry Smith rank #leaves #roots 235920662ed9SBarry Smith [ 0] 4 2 236020662ed9SBarry Smith [ 1] 4 2 236120662ed9SBarry Smith leaves roots roots in global numbering 236220662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0 236320662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1 236420662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 2 236520662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 3 236620662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0 236720662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1 236820662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 2 236920662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 3 237020662ed9SBarry Smith .ve 2371e33f79d8SJacob Faibussowitsch and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 237220662ed9SBarry Smith .vb 237320662ed9SBarry Smith rootMode = local: 237420662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes 237520662ed9SBarry Smith type: basic 237620662ed9SBarry Smith rank #leaves #roots 237720662ed9SBarry Smith [ 0] 8 4 237820662ed9SBarry Smith [ 1] 8 4 237920662ed9SBarry Smith leaves roots roots in global numbering 238020662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0 238120662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1 238220662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 4 238320662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 5 238420662ed9SBarry Smith ( 0, 4) <- ( 0, 2) = 2 238520662ed9SBarry Smith ( 0, 5) <- ( 0, 3) = 3 238620662ed9SBarry Smith ( 0, 6) <- ( 1, 2) = 6 238720662ed9SBarry Smith ( 0, 7) <- ( 1, 3) = 7 238820662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0 238920662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1 239020662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 4 239120662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 5 239220662ed9SBarry Smith ( 1, 4) <- ( 0, 2) = 2 239320662ed9SBarry Smith ( 1, 5) <- ( 0, 3) = 3 239420662ed9SBarry Smith ( 1, 6) <- ( 1, 2) = 6 239520662ed9SBarry Smith ( 1, 7) <- ( 1, 3) = 7 239620662ed9SBarry Smith 239720662ed9SBarry Smith rootMode = global: 239820662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes 239920662ed9SBarry Smith type: basic 240020662ed9SBarry Smith rank #leaves #roots 240120662ed9SBarry Smith [ 0] 8 4 240220662ed9SBarry Smith [ 1] 8 4 240320662ed9SBarry Smith leaves roots roots in global numbering 240420662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0 240520662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1 240620662ed9SBarry Smith ( 0, 2) <- ( 0, 2) = 2 240720662ed9SBarry Smith ( 0, 3) <- ( 0, 3) = 3 240820662ed9SBarry Smith ( 0, 4) <- ( 1, 0) = 4 240920662ed9SBarry Smith ( 0, 5) <- ( 1, 1) = 5 241020662ed9SBarry Smith ( 0, 6) <- ( 1, 2) = 6 241120662ed9SBarry Smith ( 0, 7) <- ( 1, 3) = 7 241220662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0 241320662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1 241420662ed9SBarry Smith ( 1, 2) <- ( 0, 2) = 2 241520662ed9SBarry Smith ( 1, 3) <- ( 0, 3) = 3 241620662ed9SBarry Smith ( 1, 4) <- ( 1, 0) = 4 241720662ed9SBarry Smith ( 1, 5) <- ( 1, 1) = 5 241820662ed9SBarry Smith ( 1, 6) <- ( 1, 2) = 6 241920662ed9SBarry Smith ( 1, 7) <- ( 1, 3) = 7 242020662ed9SBarry Smith 242120662ed9SBarry Smith rootMode = shared: 242220662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes 242320662ed9SBarry Smith type: basic 242420662ed9SBarry Smith rank #leaves #roots 242520662ed9SBarry Smith [ 0] 8 2 242620662ed9SBarry Smith [ 1] 8 2 242720662ed9SBarry Smith leaves roots roots in global numbering 242820662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0 242920662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1 243020662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 2 243120662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 3 243220662ed9SBarry Smith ( 0, 4) <- ( 0, 0) = 0 243320662ed9SBarry Smith ( 0, 5) <- ( 0, 1) = 1 243420662ed9SBarry Smith ( 0, 6) <- ( 1, 0) = 2 243520662ed9SBarry Smith ( 0, 7) <- ( 1, 1) = 3 243620662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0 243720662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1 243820662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 2 243920662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 3 244020662ed9SBarry Smith ( 1, 4) <- ( 0, 0) = 0 244120662ed9SBarry Smith ( 1, 5) <- ( 0, 1) = 1 244220662ed9SBarry Smith ( 1, 6) <- ( 1, 0) = 2 244320662ed9SBarry Smith ( 1, 7) <- ( 1, 1) = 3 244420662ed9SBarry Smith .ve 24451f40158dSVaclav Hapla 24461f40158dSVaclav Hapla .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2447157edd7aSVaclav Hapla @*/ 24481f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2449d71ae5a4SJacob Faibussowitsch { 2450157edd7aSVaclav Hapla PetscInt i, s, nLeaves, nRoots; 2451157edd7aSVaclav Hapla PetscInt *leafArrayOffsets; 2452157edd7aSVaclav Hapla PetscInt *ilocal_new; 2453157edd7aSVaclav Hapla PetscSFNode *iremote_new; 2454157edd7aSVaclav Hapla PetscBool all_ilocal_null = PETSC_FALSE; 24551f40158dSVaclav Hapla PetscLayout glayout = NULL; 24561f40158dSVaclav Hapla PetscInt *gremote = NULL; 24571f40158dSVaclav Hapla PetscMPIInt rank, size; 2458157edd7aSVaclav Hapla 2459157edd7aSVaclav Hapla PetscFunctionBegin; 246012f479c1SVaclav Hapla if (PetscDefined(USE_DEBUG)) { 2461157edd7aSVaclav Hapla PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2462157edd7aSVaclav Hapla 24639566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &dummy)); 2464157edd7aSVaclav Hapla PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 24654f572ea9SToby Isaac PetscAssertPointer(sfs, 3); 2466157edd7aSVaclav Hapla for (i = 0; i < nsfs; i++) { 2467157edd7aSVaclav Hapla PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2468157edd7aSVaclav Hapla PetscCheckSameComm(dummy, 1, sfs[i], 3); 2469157edd7aSVaclav Hapla } 24701f40158dSVaclav Hapla PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 24714f572ea9SToby Isaac if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 24724f572ea9SToby Isaac PetscAssertPointer(newsf, 6); 24739566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&dummy)); 2474157edd7aSVaclav Hapla } 2475157edd7aSVaclav Hapla if (!nsfs) { 24769566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, newsf)); 24779566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 24783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2479157edd7aSVaclav Hapla } 24809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 24811f40158dSVaclav Hapla PetscCallMPI(MPI_Comm_size(comm, &size)); 2482157edd7aSVaclav Hapla 24831f40158dSVaclav Hapla /* Calculate leaf array offsets */ 24849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2485157edd7aSVaclav Hapla leafArrayOffsets[0] = 0; 2486157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2487157edd7aSVaclav Hapla PetscInt nl; 2488157edd7aSVaclav Hapla 24899566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2490157edd7aSVaclav Hapla leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2491157edd7aSVaclav Hapla } 2492157edd7aSVaclav Hapla nLeaves = leafArrayOffsets[nsfs]; 2493157edd7aSVaclav Hapla 24941f40158dSVaclav Hapla /* Calculate number of roots */ 24951f40158dSVaclav Hapla switch (rootMode) { 24961f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 24971f40158dSVaclav Hapla PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 24981f40158dSVaclav Hapla if (PetscDefined(USE_DEBUG)) { 24991f40158dSVaclav Hapla for (s = 1; s < nsfs; s++) { 25001f40158dSVaclav Hapla PetscInt nr; 25011f40158dSVaclav Hapla 25021f40158dSVaclav Hapla PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 25031f40158dSVaclav Hapla PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 25041f40158dSVaclav Hapla } 25051f40158dSVaclav Hapla } 25061f40158dSVaclav Hapla } break; 25071f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 25081f40158dSVaclav Hapla /* Calculate also global layout in this case */ 25091f40158dSVaclav Hapla PetscInt *nls; 25101f40158dSVaclav Hapla PetscLayout *lts; 25111f40158dSVaclav Hapla PetscInt **inds; 25121f40158dSVaclav Hapla PetscInt j; 25131f40158dSVaclav Hapla PetscInt rootOffset = 0; 25141f40158dSVaclav Hapla 25151f40158dSVaclav Hapla PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 25161f40158dSVaclav Hapla PetscCall(PetscLayoutCreate(comm, &glayout)); 25171f40158dSVaclav Hapla glayout->bs = 1; 25181f40158dSVaclav Hapla glayout->n = 0; 25191f40158dSVaclav Hapla glayout->N = 0; 25201f40158dSVaclav Hapla for (s = 0; s < nsfs; s++) { 25211f40158dSVaclav Hapla PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 25221f40158dSVaclav Hapla glayout->n += lts[s]->n; 25231f40158dSVaclav Hapla glayout->N += lts[s]->N; 25241f40158dSVaclav Hapla } 25251f40158dSVaclav Hapla PetscCall(PetscLayoutSetUp(glayout)); 25261f40158dSVaclav Hapla PetscCall(PetscMalloc1(nLeaves, &gremote)); 25271f40158dSVaclav Hapla for (s = 0, j = 0; s < nsfs; s++) { 25281f40158dSVaclav Hapla for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 25291f40158dSVaclav Hapla rootOffset += lts[s]->N; 25301f40158dSVaclav Hapla PetscCall(PetscLayoutDestroy(<s[s])); 25311f40158dSVaclav Hapla PetscCall(PetscFree(inds[s])); 25321f40158dSVaclav Hapla } 25331f40158dSVaclav Hapla PetscCall(PetscFree3(lts, nls, inds)); 25341f40158dSVaclav Hapla nRoots = glayout->N; 25351f40158dSVaclav Hapla } break; 25361f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 25371f40158dSVaclav Hapla /* nRoots calculated later in this case */ 25381f40158dSVaclav Hapla break; 25391f40158dSVaclav Hapla default: 25401f40158dSVaclav Hapla SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 25411f40158dSVaclav Hapla } 25421f40158dSVaclav Hapla 2543157edd7aSVaclav Hapla if (!leafOffsets) { 2544157edd7aSVaclav Hapla all_ilocal_null = PETSC_TRUE; 2545157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2546157edd7aSVaclav Hapla const PetscInt *ilocal; 2547157edd7aSVaclav Hapla 25489566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2549157edd7aSVaclav Hapla if (ilocal) { 2550157edd7aSVaclav Hapla all_ilocal_null = PETSC_FALSE; 2551157edd7aSVaclav Hapla break; 2552157edd7aSVaclav Hapla } 2553157edd7aSVaclav Hapla } 2554157edd7aSVaclav Hapla PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2555157edd7aSVaclav Hapla } 2556157edd7aSVaclav Hapla 2557157edd7aSVaclav Hapla /* Renumber and concatenate local leaves */ 2558157edd7aSVaclav Hapla ilocal_new = NULL; 2559157edd7aSVaclav Hapla if (!all_ilocal_null) { 25609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2561157edd7aSVaclav Hapla for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2562157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2563157edd7aSVaclav Hapla const PetscInt *ilocal; 25648e3a54c0SPierre Jolivet PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2565157edd7aSVaclav Hapla PetscInt i, nleaves_l; 2566157edd7aSVaclav Hapla 25679566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2568157edd7aSVaclav Hapla for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2569157edd7aSVaclav Hapla } 2570157edd7aSVaclav Hapla } 2571157edd7aSVaclav Hapla 2572157edd7aSVaclav Hapla /* Renumber and concatenate remote roots */ 25731f40158dSVaclav Hapla if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 25741f40158dSVaclav Hapla PetscInt rootOffset = 0; 25751f40158dSVaclav Hapla 25769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2577157edd7aSVaclav Hapla for (i = 0; i < nLeaves; i++) { 2578157edd7aSVaclav Hapla iremote_new[i].rank = -1; 2579157edd7aSVaclav Hapla iremote_new[i].index = -1; 2580157edd7aSVaclav Hapla } 2581157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2582157edd7aSVaclav Hapla PetscInt i, nl, nr; 2583157edd7aSVaclav Hapla PetscSF tmp_sf; 2584157edd7aSVaclav Hapla const PetscSFNode *iremote; 2585157edd7aSVaclav Hapla PetscSFNode *tmp_rootdata; 25868e3a54c0SPierre Jolivet PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2587157edd7aSVaclav Hapla 25889566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 25899566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &tmp_sf)); 2590157edd7aSVaclav Hapla /* create helper SF with contiguous leaves */ 25919566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 25929566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(tmp_sf)); 25939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 25941f40158dSVaclav Hapla if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2595157edd7aSVaclav Hapla for (i = 0; i < nr; i++) { 25961f40158dSVaclav Hapla tmp_rootdata[i].index = i + rootOffset; 25976497c311SBarry Smith tmp_rootdata[i].rank = rank; 2598157edd7aSVaclav Hapla } 25991f40158dSVaclav Hapla rootOffset += nr; 26001f40158dSVaclav Hapla } else { 26011f40158dSVaclav Hapla for (i = 0; i < nr; i++) { 26021f40158dSVaclav Hapla tmp_rootdata[i].index = i; 26036497c311SBarry Smith tmp_rootdata[i].rank = rank; 26041f40158dSVaclav Hapla } 26051f40158dSVaclav Hapla } 26066497c311SBarry Smith PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 26076497c311SBarry Smith PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 26089566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&tmp_sf)); 26099566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp_rootdata)); 2610157edd7aSVaclav Hapla } 2611aa624791SPierre Jolivet if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2612157edd7aSVaclav Hapla 2613157edd7aSVaclav Hapla /* Build the new SF */ 26149566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, newsf)); 26159566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 26161f40158dSVaclav Hapla } else { 26171f40158dSVaclav Hapla /* Build the new SF */ 26181f40158dSVaclav Hapla PetscCall(PetscSFCreate(comm, newsf)); 26191f40158dSVaclav Hapla PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 26201f40158dSVaclav Hapla } 26219566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(*newsf)); 26221f40158dSVaclav Hapla PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 26231f40158dSVaclav Hapla PetscCall(PetscLayoutDestroy(&glayout)); 26241f40158dSVaclav Hapla PetscCall(PetscFree(gremote)); 26259566063dSJacob Faibussowitsch PetscCall(PetscFree(leafArrayOffsets)); 26263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2627157edd7aSVaclav Hapla } 26288e54d7e8SToby Isaac 26298e54d7e8SToby Isaac /*@ 26308e54d7e8SToby Isaac PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications. 26318e54d7e8SToby Isaac 26328e54d7e8SToby Isaac Collective 26338e54d7e8SToby Isaac 26348e54d7e8SToby Isaac Input Parameters: 26358e54d7e8SToby Isaac + sf - star forest 26368e54d7e8SToby Isaac . unit - the data type contained within the root and leaf data 2637d7c1f440SPierre Jolivet . rootdata - root data that will be used for multiple PetscSF communications 2638d7c1f440SPierre Jolivet - leafdata - leaf data that will be used for multiple PetscSF communications 26398e54d7e8SToby Isaac 26408e54d7e8SToby Isaac Level: advanced 26418e54d7e8SToby Isaac 26428e54d7e8SToby Isaac Notes: 26438e54d7e8SToby Isaac Implementations of `PetscSF` can make optimizations 26448e54d7e8SToby Isaac for repeated communication using the same memory regions, but these optimizations 26458e54d7e8SToby Isaac can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 26468e54d7e8SToby Isaac The intended pattern is 26478e54d7e8SToby Isaac 26488e54d7e8SToby Isaac .vb 26498e54d7e8SToby Isaac PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 26508e54d7e8SToby Isaac 26518e54d7e8SToby Isaac PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 26528e54d7e8SToby Isaac // repeated use of rootdata and leafdata will now be optimized 26538e54d7e8SToby Isaac 26548e54d7e8SToby Isaac PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 26558e54d7e8SToby Isaac PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 26568e54d7e8SToby Isaac // ... 26578e54d7e8SToby Isaac PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 26588e54d7e8SToby Isaac PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 26598e54d7e8SToby Isaac // ... (other communications) 26608e54d7e8SToby Isaac 26618e54d7e8SToby Isaac // rootdata and leafdata must be deregistered before freeing 26628e54d7e8SToby Isaac // skipping this can lead to undefined behavior including 26638e54d7e8SToby Isaac // deadlocks 26648e54d7e8SToby Isaac PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 26658e54d7e8SToby Isaac 26668e54d7e8SToby Isaac // it is now safe to free rootdata and leafdata 26678e54d7e8SToby Isaac PetscFree2(rootdata, leafdata); 26688e54d7e8SToby Isaac .ve 26698e54d7e8SToby Isaac 26708e54d7e8SToby Isaac If you do not register `rootdata` and `leafdata` it will not cause an error, 26718e54d7e8SToby Isaac but optimizations that reduce the setup time for each communication cannot be 26728e54d7e8SToby Isaac made. Currently, the only implementation of `PetscSF` that benefits from 26738e54d7e8SToby Isaac `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 26748e54d7e8SToby Isaac `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 26758e54d7e8SToby Isaac 26768e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 26778e54d7e8SToby Isaac @*/ 26788e54d7e8SToby Isaac PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 26798e54d7e8SToby Isaac { 26808e54d7e8SToby Isaac PetscFunctionBegin; 26818e54d7e8SToby Isaac PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 26828e54d7e8SToby Isaac PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 26838e54d7e8SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 26848e54d7e8SToby Isaac } 26858e54d7e8SToby Isaac 26868e54d7e8SToby Isaac /*@ 26878e54d7e8SToby Isaac PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded. 26888e54d7e8SToby Isaac 26898e54d7e8SToby Isaac Collective 26908e54d7e8SToby Isaac 26918e54d7e8SToby Isaac Input Parameters: 26928e54d7e8SToby Isaac + sf - star forest 26938e54d7e8SToby Isaac . unit - the data type contained within the root and leaf data 26948e54d7e8SToby Isaac . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 26958e54d7e8SToby Isaac - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 26968e54d7e8SToby Isaac 26978e54d7e8SToby Isaac Level: advanced 26988e54d7e8SToby Isaac 26998e54d7e8SToby Isaac Note: 27008e54d7e8SToby Isaac See `PetscSFRegisterPersistent()` for when/how to use this function. 27018e54d7e8SToby Isaac 27028e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 27038e54d7e8SToby Isaac @*/ 27048e54d7e8SToby Isaac PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 27058e54d7e8SToby Isaac { 27068e54d7e8SToby Isaac PetscFunctionBegin; 27078e54d7e8SToby Isaac PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 27088e54d7e8SToby Isaac PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 27098e54d7e8SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 27108e54d7e8SToby Isaac } 2711e1187f0dSToby Isaac 2712e1187f0dSToby Isaac PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2713e1187f0dSToby Isaac { 2714e1187f0dSToby Isaac MPI_Aint lb, lb_true, bytes, bytes_true; 2715e1187f0dSToby Isaac 2716e1187f0dSToby Isaac PetscFunctionBegin; 2717e1187f0dSToby Isaac PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2718e1187f0dSToby Isaac PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2719e1187f0dSToby Isaac PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2720e1187f0dSToby Isaac *size = bytes; 2721e1187f0dSToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 2722e1187f0dSToby Isaac } 2723