xref: /petsc/src/vec/is/sf/interface/sf.c (revision e1187f0d1609b624b33bdbbdcab43a1c99b70a0c)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h>
595fce210SBarry Smith 
67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
77fd2d3dbSJunchao Zhang   #include <cuda_runtime.h>
8715b587bSJunchao Zhang   #include <petscdevice_cuda.h>
97fd2d3dbSJunchao Zhang #endif
107fd2d3dbSJunchao Zhang 
117fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
127fd2d3dbSJunchao Zhang   #include <hip/hip_runtime.h>
137fd2d3dbSJunchao Zhang #endif
147fd2d3dbSJunchao Zhang 
152abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER)
164bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int);
172abc8c78SJacob Faibussowitsch #else
1895fce210SBarry Smith   #if defined(PETSC_USE_DEBUG)
19a8f51744SPierre Jolivet     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
2095fce210SBarry Smith   #else
219371c9d4SSatish Balay     #define PetscSFCheckGraphSet(sf, arg) \
229371c9d4SSatish Balay       do { \
239371c9d4SSatish Balay       } while (0)
2495fce210SBarry Smith   #endif
252abc8c78SJacob Faibussowitsch #endif
2695fce210SBarry Smith 
274c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
281f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
2995fce210SBarry Smith 
308af6ec1cSBarry Smith /*@
3195fce210SBarry Smith   PetscSFCreate - create a star forest communication context
3295fce210SBarry Smith 
33d083f849SBarry Smith   Collective
3495fce210SBarry Smith 
354165533cSJose E. Roman   Input Parameter:
3695fce210SBarry Smith . comm - communicator on which the star forest will operate
3795fce210SBarry Smith 
384165533cSJose E. Roman   Output Parameter:
3995fce210SBarry Smith . sf - new star forest context
4095fce210SBarry Smith 
4120662ed9SBarry Smith   Options Database Key:
426677b1c1SJunchao Zhang + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
436677b1c1SJunchao Zhang . -sf_type window                - Use MPI-3 one-sided window for communication
446677b1c1SJunchao Zhang . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
456677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46dd5b3ca6SJunchao Zhang 
4795fce210SBarry Smith   Level: intermediate
4895fce210SBarry Smith 
49cab54364SBarry Smith   Note:
50cab54364SBarry Smith   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51cab54364SBarry Smith   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
5220662ed9SBarry Smith   `SF`s are optimized and they have better performance than the general `SF`s.
53dd5b3ca6SJunchao Zhang 
5438b5cf2dSJacob Faibussowitsch .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
5595fce210SBarry Smith @*/
56d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57d71ae5a4SJacob Faibussowitsch {
5895fce210SBarry Smith   PetscSF b;
5995fce210SBarry Smith 
6095fce210SBarry Smith   PetscFunctionBegin;
614f572ea9SToby Isaac   PetscAssertPointer(sf, 2);
629566063dSJacob Faibussowitsch   PetscCall(PetscSFInitializePackage());
6395fce210SBarry Smith 
649566063dSJacob Faibussowitsch   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
6595fce210SBarry Smith   b->nroots    = -1;
6695fce210SBarry Smith   b->nleaves   = -1;
6729046d53SLisandro Dalcin   b->minleaf   = PETSC_MAX_INT;
6829046d53SLisandro Dalcin   b->maxleaf   = PETSC_MIN_INT;
6995fce210SBarry Smith   b->nranks    = -1;
7095fce210SBarry Smith   b->rankorder = PETSC_TRUE;
7195fce210SBarry Smith   b->ingroup   = MPI_GROUP_NULL;
7295fce210SBarry Smith   b->outgroup  = MPI_GROUP_NULL;
7395fce210SBarry Smith   b->graphset  = PETSC_FALSE;
7420c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
7520c24465SJunchao Zhang   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
7620c24465SJunchao Zhang   b->use_stream_aware_mpi = PETSC_FALSE;
7771438e86SJunchao Zhang   b->unknown_input_stream = PETSC_FALSE;
7827f636e8SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
7920c24465SJunchao Zhang   b->backend = PETSCSF_BACKEND_KOKKOS;
8027f636e8SJunchao Zhang   #elif defined(PETSC_HAVE_CUDA)
8127f636e8SJunchao Zhang   b->backend = PETSCSF_BACKEND_CUDA;
8259af0bd3SScott Kruger   #elif defined(PETSC_HAVE_HIP)
8359af0bd3SScott Kruger   b->backend = PETSCSF_BACKEND_HIP;
8420c24465SJunchao Zhang   #endif
8571438e86SJunchao Zhang 
8671438e86SJunchao Zhang   #if defined(PETSC_HAVE_NVSHMEM)
8771438e86SJunchao Zhang   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
8871438e86SJunchao Zhang   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
899566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
909566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
9171438e86SJunchao Zhang   #endif
9220c24465SJunchao Zhang #endif
9360c22052SBarry Smith   b->vscat.from_n = -1;
9460c22052SBarry Smith   b->vscat.to_n   = -1;
9560c22052SBarry Smith   b->vscat.unit   = MPIU_SCALAR;
9695fce210SBarry Smith   *sf             = b;
973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9895fce210SBarry Smith }
9995fce210SBarry Smith 
10029046d53SLisandro Dalcin /*@
10195fce210SBarry Smith   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
10295fce210SBarry Smith 
10395fce210SBarry Smith   Collective
10495fce210SBarry Smith 
1054165533cSJose E. Roman   Input Parameter:
10695fce210SBarry Smith . sf - star forest
10795fce210SBarry Smith 
10895fce210SBarry Smith   Level: advanced
10995fce210SBarry Smith 
110cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
11195fce210SBarry Smith @*/
112d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf)
113d71ae5a4SJacob Faibussowitsch {
11495fce210SBarry Smith   PetscFunctionBegin;
11595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
116dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Reset);
1170dd791a8SStefano Zampini   PetscCall(PetscSFDestroy(&sf->rankssf));
1180dd791a8SStefano Zampini 
11929046d53SLisandro Dalcin   sf->nroots   = -1;
12029046d53SLisandro Dalcin   sf->nleaves  = -1;
12129046d53SLisandro Dalcin   sf->minleaf  = PETSC_MAX_INT;
12229046d53SLisandro Dalcin   sf->maxleaf  = PETSC_MIN_INT;
12395fce210SBarry Smith   sf->mine     = NULL;
12495fce210SBarry Smith   sf->remote   = NULL;
12529046d53SLisandro Dalcin   sf->graphset = PETSC_FALSE;
1269566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->mine_alloc));
1279566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->remote_alloc));
12821c688dcSJed Brown   sf->nranks = -1;
1299566063dSJacob Faibussowitsch   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
13029046d53SLisandro Dalcin   sf->degreeknown = PETSC_FALSE;
1319566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->degree));
1329566063dSJacob Faibussowitsch   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
1339566063dSJacob Faibussowitsch   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
1340dd791a8SStefano Zampini 
135013b3241SStefano Zampini   if (sf->multi) sf->multi->multi = NULL;
1369566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf->multi));
1370dd791a8SStefano Zampini 
1389566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sf->map));
13971438e86SJunchao Zhang 
14071438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
1419566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
14271438e86SJunchao Zhang #endif
14371438e86SJunchao Zhang 
14495fce210SBarry Smith   sf->setupcalled = PETSC_FALSE;
1453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14695fce210SBarry Smith }
14795fce210SBarry Smith 
148cc4c1da9SBarry Smith /*@
149cab54364SBarry Smith   PetscSFSetType - Set the `PetscSF` communication implementation
15095fce210SBarry Smith 
151c3339decSBarry Smith   Collective
15295fce210SBarry Smith 
15395fce210SBarry Smith   Input Parameters:
154cab54364SBarry Smith + sf   - the `PetscSF` context
15595fce210SBarry Smith - type - a known method
156cab54364SBarry Smith .vb
157cab54364SBarry Smith     PETSCSFWINDOW - MPI-2/3 one-sided
158cab54364SBarry Smith     PETSCSFBASIC - basic implementation using MPI-1 two-sided
159cab54364SBarry Smith .ve
16095fce210SBarry Smith 
16195fce210SBarry Smith   Options Database Key:
16220662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
163cab54364SBarry Smith 
164cab54364SBarry Smith   Level: intermediate
16595fce210SBarry Smith 
16695fce210SBarry Smith   Notes:
16720662ed9SBarry Smith   See `PetscSFType` for possible values
16895fce210SBarry Smith 
16920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
17095fce210SBarry Smith @*/
171d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
172d71ae5a4SJacob Faibussowitsch {
17395fce210SBarry Smith   PetscBool match;
1745f80ce2aSJacob Faibussowitsch   PetscErrorCode (*r)(PetscSF);
17595fce210SBarry Smith 
17695fce210SBarry Smith   PetscFunctionBegin;
17795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1784f572ea9SToby Isaac   PetscAssertPointer(type, 2);
17995fce210SBarry Smith 
1809566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
1813ba16761SJacob Faibussowitsch   if (match) PetscFunctionReturn(PETSC_SUCCESS);
18295fce210SBarry Smith 
1839566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
1846adde796SStefano Zampini   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
18529046d53SLisandro Dalcin   /* Destroy the previous PetscSF implementation context */
186dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Destroy);
1879566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
1889566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
1899566063dSJacob Faibussowitsch   PetscCall((*r)(sf));
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19195fce210SBarry Smith }
19295fce210SBarry Smith 
193cc4c1da9SBarry Smith /*@
194cab54364SBarry Smith   PetscSFGetType - Get the `PetscSF` communication implementation
19529046d53SLisandro Dalcin 
19629046d53SLisandro Dalcin   Not Collective
19729046d53SLisandro Dalcin 
19829046d53SLisandro Dalcin   Input Parameter:
199cab54364SBarry Smith . sf - the `PetscSF` context
20029046d53SLisandro Dalcin 
20129046d53SLisandro Dalcin   Output Parameter:
202cab54364SBarry Smith . type - the `PetscSF` type name
20329046d53SLisandro Dalcin 
20429046d53SLisandro Dalcin   Level: intermediate
20529046d53SLisandro Dalcin 
20620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
20729046d53SLisandro Dalcin @*/
208d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
209d71ae5a4SJacob Faibussowitsch {
21029046d53SLisandro Dalcin   PetscFunctionBegin;
21129046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2124f572ea9SToby Isaac   PetscAssertPointer(type, 2);
21329046d53SLisandro Dalcin   *type = ((PetscObject)sf)->type_name;
2143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21529046d53SLisandro Dalcin }
21629046d53SLisandro Dalcin 
2170764c050SBarry Smith /*@
21820662ed9SBarry Smith   PetscSFDestroy - destroy a star forest
21995fce210SBarry Smith 
22095fce210SBarry Smith   Collective
22195fce210SBarry Smith 
2224165533cSJose E. Roman   Input Parameter:
22395fce210SBarry Smith . sf - address of star forest
22495fce210SBarry Smith 
22595fce210SBarry Smith   Level: intermediate
22695fce210SBarry Smith 
22720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
22895fce210SBarry Smith @*/
229d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf)
230d71ae5a4SJacob Faibussowitsch {
23195fce210SBarry Smith   PetscFunctionBegin;
2323ba16761SJacob Faibussowitsch   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233f4f49eeaSPierre Jolivet   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
234f4f49eeaSPierre Jolivet   if (--((PetscObject)*sf)->refct > 0) {
2359371c9d4SSatish Balay     *sf = NULL;
2363ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2379371c9d4SSatish Balay   }
2389566063dSJacob Faibussowitsch   PetscCall(PetscSFReset(*sf));
239f4f49eeaSPierre Jolivet   PetscTryTypeMethod(*sf, Destroy);
2409566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
2419566063dSJacob Faibussowitsch   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
242c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
243715b587bSJunchao Zhang   if ((*sf)->use_stream_aware_mpi) {
244715b587bSJunchao Zhang     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
245715b587bSJunchao Zhang     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
246715b587bSJunchao Zhang   }
247715b587bSJunchao Zhang #endif
2489566063dSJacob Faibussowitsch   PetscCall(PetscHeaderDestroy(sf));
2493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25095fce210SBarry Smith }
25195fce210SBarry Smith 
252d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
253d71ae5a4SJacob Faibussowitsch {
254c4e6a40aSLawrence Mitchell   PetscInt           i, nleaves;
255c4e6a40aSLawrence Mitchell   PetscMPIInt        size;
256c4e6a40aSLawrence Mitchell   const PetscInt    *ilocal;
257c4e6a40aSLawrence Mitchell   const PetscSFNode *iremote;
258c4e6a40aSLawrence Mitchell 
259c4e6a40aSLawrence Mitchell   PetscFunctionBegin;
2603ba16761SJacob Faibussowitsch   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
2619566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
2629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
263c4e6a40aSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
264c4e6a40aSLawrence Mitchell     const PetscInt rank   = iremote[i].rank;
265c4e6a40aSLawrence Mitchell     const PetscInt remote = iremote[i].index;
266c4e6a40aSLawrence Mitchell     const PetscInt leaf   = ilocal ? ilocal[i] : i;
267c9cc58a2SBarry Smith     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
26808401ef6SPierre Jolivet     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
26908401ef6SPierre Jolivet     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
270c4e6a40aSLawrence Mitchell   }
2713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
272c4e6a40aSLawrence Mitchell }
273c4e6a40aSLawrence Mitchell 
27495fce210SBarry Smith /*@
27520662ed9SBarry Smith   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
27695fce210SBarry Smith 
27795fce210SBarry Smith   Collective
27895fce210SBarry Smith 
2794165533cSJose E. Roman   Input Parameter:
28095fce210SBarry Smith . sf - star forest communication object
28195fce210SBarry Smith 
28295fce210SBarry Smith   Level: beginner
28395fce210SBarry Smith 
28420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
28595fce210SBarry Smith @*/
286d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf)
287d71ae5a4SJacob Faibussowitsch {
28895fce210SBarry Smith   PetscFunctionBegin;
28929046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
29029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
2913ba16761SJacob Faibussowitsch   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
2929566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
2939566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckGraphValid_Private(sf));
2949566063dSJacob Faibussowitsch   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
295dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetUp);
29620c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
29720c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_CUDA) {
29871438e86SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_CUDA;
29971438e86SJunchao Zhang     sf->ops->Free   = PetscSFFree_CUDA;
30020c24465SJunchao Zhang   }
30120c24465SJunchao Zhang #endif
30259af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
30359af0bd3SScott Kruger   if (sf->backend == PETSCSF_BACKEND_HIP) {
30459af0bd3SScott Kruger     sf->ops->Malloc = PetscSFMalloc_HIP;
30559af0bd3SScott Kruger     sf->ops->Free   = PetscSFFree_HIP;
30659af0bd3SScott Kruger   }
30759af0bd3SScott Kruger #endif
30820c24465SJunchao Zhang 
30920c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
31020c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
31120c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Kokkos;
31220c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Kokkos;
31320c24465SJunchao Zhang   }
31420c24465SJunchao Zhang #endif
3159566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
31695fce210SBarry Smith   sf->setupcalled = PETSC_TRUE;
3173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31895fce210SBarry Smith }
31995fce210SBarry Smith 
3208af6ec1cSBarry Smith /*@
321cab54364SBarry Smith   PetscSFSetFromOptions - set `PetscSF` options using the options database
32295fce210SBarry Smith 
32395fce210SBarry Smith   Logically Collective
32495fce210SBarry Smith 
3254165533cSJose E. Roman   Input Parameter:
32695fce210SBarry Smith . sf - star forest
32795fce210SBarry Smith 
32895fce210SBarry Smith   Options Database Keys:
32920662ed9SBarry Smith + -sf_type                                                                                                         - implementation type, see `PetscSFSetType()`
33051ccb202SJunchao Zhang . -sf_rank_order                                                                                                   - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
33120662ed9SBarry Smith . -sf_use_default_stream                                                                                           - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
33220662ed9SBarry Smith                             use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
33320662ed9SBarry Smith                             If true, this option only works with `-use_gpu_aware_mpi 1`.
33420662ed9SBarry Smith . -sf_use_stream_aware_mpi                                                                                         - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
33520662ed9SBarry Smith                                If true, this option only works with `-use_gpu_aware_mpi 1`.
33695fce210SBarry Smith 
33738b5cf2dSJacob Faibussowitsch - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
33859af0bd3SScott Kruger                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
33920c24465SJunchao Zhang                               the only available is kokkos.
34020c24465SJunchao Zhang 
34195fce210SBarry Smith   Level: intermediate
342cab54364SBarry Smith 
343cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
34495fce210SBarry Smith @*/
345d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
346d71ae5a4SJacob Faibussowitsch {
34795fce210SBarry Smith   PetscSFType deft;
34895fce210SBarry Smith   char        type[256];
34995fce210SBarry Smith   PetscBool   flg;
35095fce210SBarry Smith 
35195fce210SBarry Smith   PetscFunctionBegin;
35295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
353d0609cedSBarry Smith   PetscObjectOptionsBegin((PetscObject)sf);
35495fce210SBarry Smith   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
3559566063dSJacob Faibussowitsch   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
3569566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, flg ? type : deft));
3579566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
3587fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
35920c24465SJunchao Zhang   {
36020c24465SJunchao Zhang     char      backendstr[32] = {0};
36159af0bd3SScott Kruger     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
36220c24465SJunchao Zhang     /* Change the defaults set in PetscSFCreate() with command line options */
363d5b43468SJose E. Roman     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
3649566063dSJacob Faibussowitsch     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
3659566063dSJacob Faibussowitsch     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
3669566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
3679566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
3689566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
36959af0bd3SScott Kruger   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
37020c24465SJunchao Zhang     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
37120c24465SJunchao Zhang     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
37259af0bd3SScott Kruger     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
37328b400f6SJacob Faibussowitsch     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
37420c24465SJunchao Zhang   #elif defined(PETSC_HAVE_KOKKOS)
37508401ef6SPierre Jolivet     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
37620c24465SJunchao Zhang   #endif
377715b587bSJunchao Zhang 
378715b587bSJunchao Zhang   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379715b587bSJunchao Zhang     if (sf->use_stream_aware_mpi) {
380715b587bSJunchao Zhang       MPI_Info info;
381715b587bSJunchao Zhang 
382715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_create(&info));
383715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384715b587bSJunchao Zhang       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_free(&info));
387715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388715b587bSJunchao Zhang     }
389715b587bSJunchao Zhang   #endif
39020c24465SJunchao Zhang   }
391c2a741eeSJunchao Zhang #endif
392dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393d0609cedSBarry Smith   PetscOptionsEnd();
3943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39595fce210SBarry Smith }
39695fce210SBarry Smith 
39729046d53SLisandro Dalcin /*@
39895fce210SBarry Smith   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
39995fce210SBarry Smith 
40095fce210SBarry Smith   Logically Collective
40195fce210SBarry Smith 
4024165533cSJose E. Roman   Input Parameters:
40395fce210SBarry Smith + sf  - star forest
404cab54364SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
40595fce210SBarry Smith 
40695fce210SBarry Smith   Level: advanced
40795fce210SBarry Smith 
40820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
40995fce210SBarry Smith @*/
410d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411d71ae5a4SJacob Faibussowitsch {
41295fce210SBarry Smith   PetscFunctionBegin;
41395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
41495fce210SBarry Smith   PetscValidLogicalCollectiveBool(sf, flg, 2);
41528b400f6SJacob Faibussowitsch   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
41695fce210SBarry Smith   sf->rankorder = flg;
4173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41895fce210SBarry Smith }
41995fce210SBarry Smith 
4205d83a8b1SBarry Smith /*@
42195fce210SBarry Smith   PetscSFSetGraph - Set a parallel star forest
42295fce210SBarry Smith 
42395fce210SBarry Smith   Collective
42495fce210SBarry Smith 
4254165533cSJose E. Roman   Input Parameters:
42695fce210SBarry Smith + sf         - star forest
42795fce210SBarry Smith . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
42895fce210SBarry Smith . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
42920662ed9SBarry Smith . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
430c4e6a40aSLawrence Mitchell during setup in debug mode)
43120662ed9SBarry Smith . localmode  - copy mode for `ilocal`
432c4e6a40aSLawrence Mitchell . iremote    - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
433c4e6a40aSLawrence Mitchell during setup in debug mode)
43420662ed9SBarry Smith - remotemode - copy mode for `iremote`
43595fce210SBarry Smith 
43695fce210SBarry Smith   Level: intermediate
43795fce210SBarry Smith 
43895452b02SPatrick Sanan   Notes:
43920662ed9SBarry Smith   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
44038ab3f8aSBarry Smith 
44120662ed9SBarry Smith   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
44220662ed9SBarry Smith   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
443db2b9530SVaclav Hapla   PETSc might modify the respective array;
44420662ed9SBarry Smith   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
445cab54364SBarry Smith   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
446db2b9530SVaclav Hapla 
44738b5cf2dSJacob Faibussowitsch   Fortran Notes:
44820662ed9SBarry Smith   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449c4e6a40aSLawrence Mitchell 
45038b5cf2dSJacob Faibussowitsch   Developer Notes:
451db2b9530SVaclav Hapla   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
45220662ed9SBarry Smith   This also allows to compare leaf sets of two `PetscSF`s easily.
45372bf8598SVaclav Hapla 
45420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
45595fce210SBarry Smith @*/
456d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
457d71ae5a4SJacob Faibussowitsch {
458db2b9530SVaclav Hapla   PetscBool unique, contiguous;
45995fce210SBarry Smith 
46095fce210SBarry Smith   PetscFunctionBegin;
46195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
4624f572ea9SToby Isaac   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
4634f572ea9SToby Isaac   if (nleaves > 0) PetscAssertPointer(iremote, 6);
46408401ef6SPierre Jolivet   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
46508401ef6SPierre Jolivet   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
4668da24d32SBarry Smith   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
4678da24d32SBarry Smith    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
4688da24d32SBarry Smith   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
4698da24d32SBarry Smith   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
47029046d53SLisandro Dalcin 
4712a67d2daSStefano Zampini   if (sf->nroots >= 0) { /* Reset only if graph already set */
4729566063dSJacob Faibussowitsch     PetscCall(PetscSFReset(sf));
4732a67d2daSStefano Zampini   }
4742a67d2daSStefano Zampini 
4759566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
47629046d53SLisandro Dalcin 
47795fce210SBarry Smith   sf->nroots  = nroots;
47895fce210SBarry Smith   sf->nleaves = nleaves;
47929046d53SLisandro Dalcin 
480db2b9530SVaclav Hapla   if (localmode == PETSC_COPY_VALUES && ilocal) {
481db2b9530SVaclav Hapla     PetscInt *tlocal = NULL;
482db2b9530SVaclav Hapla 
4839566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tlocal));
4849566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
485db2b9530SVaclav Hapla     ilocal = tlocal;
486db2b9530SVaclav Hapla   }
487db2b9530SVaclav Hapla   if (remotemode == PETSC_COPY_VALUES) {
488db2b9530SVaclav Hapla     PetscSFNode *tremote = NULL;
489db2b9530SVaclav Hapla 
4909566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tremote));
4919566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
492db2b9530SVaclav Hapla     iremote = tremote;
493db2b9530SVaclav Hapla   }
494db2b9530SVaclav Hapla 
49529046d53SLisandro Dalcin   if (nleaves && ilocal) {
496db2b9530SVaclav Hapla     PetscSFNode work;
497db2b9530SVaclav Hapla 
4989566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
4999566063dSJacob Faibussowitsch     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
500db2b9530SVaclav Hapla     unique = PetscNot(unique);
501db2b9530SVaclav Hapla     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
502db2b9530SVaclav Hapla     sf->minleaf = ilocal[0];
503db2b9530SVaclav Hapla     sf->maxleaf = ilocal[nleaves - 1];
504db2b9530SVaclav Hapla     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
50529046d53SLisandro Dalcin   } else {
50629046d53SLisandro Dalcin     sf->minleaf = 0;
50729046d53SLisandro Dalcin     sf->maxleaf = nleaves - 1;
508db2b9530SVaclav Hapla     unique      = PETSC_TRUE;
509db2b9530SVaclav Hapla     contiguous  = PETSC_TRUE;
51029046d53SLisandro Dalcin   }
51129046d53SLisandro Dalcin 
512db2b9530SVaclav Hapla   if (contiguous) {
513db2b9530SVaclav Hapla     if (localmode == PETSC_USE_POINTER) {
514db2b9530SVaclav Hapla       ilocal = NULL;
515db2b9530SVaclav Hapla     } else {
5169566063dSJacob Faibussowitsch       PetscCall(PetscFree(ilocal));
517db2b9530SVaclav Hapla     }
518db2b9530SVaclav Hapla   }
519db2b9530SVaclav Hapla   sf->mine = ilocal;
520db2b9530SVaclav Hapla   if (localmode == PETSC_USE_POINTER) {
52129046d53SLisandro Dalcin     sf->mine_alloc = NULL;
522db2b9530SVaclav Hapla   } else {
523db2b9530SVaclav Hapla     sf->mine_alloc = ilocal;
52495fce210SBarry Smith   }
525db2b9530SVaclav Hapla   sf->remote = iremote;
526db2b9530SVaclav Hapla   if (remotemode == PETSC_USE_POINTER) {
52729046d53SLisandro Dalcin     sf->remote_alloc = NULL;
528db2b9530SVaclav Hapla   } else {
529db2b9530SVaclav Hapla     sf->remote_alloc = iremote;
53095fce210SBarry Smith   }
5319566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
53229046d53SLisandro Dalcin   sf->graphset = PETSC_TRUE;
5333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
53495fce210SBarry Smith }
53595fce210SBarry Smith 
53629046d53SLisandro Dalcin /*@
537cab54364SBarry Smith   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
538dd5b3ca6SJunchao Zhang 
539dd5b3ca6SJunchao Zhang   Collective
540dd5b3ca6SJunchao Zhang 
541dd5b3ca6SJunchao Zhang   Input Parameters:
542cab54364SBarry Smith + sf      - The `PetscSF`
543cab54364SBarry Smith . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
544cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
545cab54364SBarry Smith 
546cab54364SBarry Smith   Level: intermediate
547dd5b3ca6SJunchao Zhang 
548dd5b3ca6SJunchao Zhang   Notes:
54920662ed9SBarry Smith   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
55020662ed9SBarry Smith   `n` and `N` are the local and global sizes of `x` respectively.
551dd5b3ca6SJunchao Zhang 
55220662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
55320662ed9SBarry Smith   sequential vectors `y` on all MPI processes.
554dd5b3ca6SJunchao Zhang 
55520662ed9SBarry Smith   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
55620662ed9SBarry Smith   sequential vector `y` on rank 0.
557dd5b3ca6SJunchao Zhang 
55820662ed9SBarry Smith   In above cases, entries of `x` are roots and entries of `y` are leaves.
559dd5b3ca6SJunchao Zhang 
56020662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
561dd5b3ca6SJunchao Zhang   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
562cab54364SBarry Smith   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
563dd5b3ca6SJunchao Zhang   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
564cab54364SBarry Smith   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
565dd5b3ca6SJunchao Zhang 
566dd5b3ca6SJunchao Zhang   In this case, roots and leaves are symmetric.
567dd5b3ca6SJunchao Zhang 
568cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
569dd5b3ca6SJunchao Zhang  @*/
570d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
571d71ae5a4SJacob Faibussowitsch {
572dd5b3ca6SJunchao Zhang   MPI_Comm    comm;
573dd5b3ca6SJunchao Zhang   PetscInt    n, N, res[2];
574dd5b3ca6SJunchao Zhang   PetscMPIInt rank, size;
575dd5b3ca6SJunchao Zhang   PetscSFType type;
576dd5b3ca6SJunchao Zhang 
577dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
5782abc8c78SJacob Faibussowitsch   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
5794f572ea9SToby Isaac   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
5809566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
5812c71b3e2SJacob Faibussowitsch   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
5829566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5839566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
584dd5b3ca6SJunchao Zhang 
585dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
586dd5b3ca6SJunchao Zhang     type = PETSCSFALLTOALL;
5879566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreate(comm, &sf->map));
5889566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
5899566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
5909566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(sf->map));
591dd5b3ca6SJunchao Zhang   } else {
5929566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetLocalSize(map, &n));
5939566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetSize(map, &N));
594dd5b3ca6SJunchao Zhang     res[0] = n;
595dd5b3ca6SJunchao Zhang     res[1] = -n;
596dd5b3ca6SJunchao Zhang     /* Check if n are same over all ranks so that we can optimize it */
5971c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
598dd5b3ca6SJunchao Zhang     if (res[0] == -res[1]) { /* same n */
599dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
600dd5b3ca6SJunchao Zhang     } else {
601dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
602dd5b3ca6SJunchao Zhang     }
6039566063dSJacob Faibussowitsch     PetscCall(PetscLayoutReference(map, &sf->map));
604dd5b3ca6SJunchao Zhang   }
6059566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, type));
606dd5b3ca6SJunchao Zhang 
607dd5b3ca6SJunchao Zhang   sf->pattern = pattern;
608dd5b3ca6SJunchao Zhang   sf->mine    = NULL; /* Contiguous */
609dd5b3ca6SJunchao Zhang 
610dd5b3ca6SJunchao Zhang   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
611dd5b3ca6SJunchao Zhang      Also set other easy stuff.
612dd5b3ca6SJunchao Zhang    */
613dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
614dd5b3ca6SJunchao Zhang     sf->nleaves = N;
615dd5b3ca6SJunchao Zhang     sf->nroots  = n;
616dd5b3ca6SJunchao Zhang     sf->nranks  = size;
617dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
618dd5b3ca6SJunchao Zhang     sf->maxleaf = N - 1;
619dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_GATHER) {
620dd5b3ca6SJunchao Zhang     sf->nleaves = rank ? 0 : N;
621dd5b3ca6SJunchao Zhang     sf->nroots  = n;
622dd5b3ca6SJunchao Zhang     sf->nranks  = rank ? 0 : size;
623dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
624dd5b3ca6SJunchao Zhang     sf->maxleaf = rank ? -1 : N - 1;
625dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
626dd5b3ca6SJunchao Zhang     sf->nleaves = size;
627dd5b3ca6SJunchao Zhang     sf->nroots  = size;
628dd5b3ca6SJunchao Zhang     sf->nranks  = size;
629dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
630dd5b3ca6SJunchao Zhang     sf->maxleaf = size - 1;
631dd5b3ca6SJunchao Zhang   }
632dd5b3ca6SJunchao Zhang   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
633dd5b3ca6SJunchao Zhang   sf->graphset = PETSC_TRUE;
6343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
635dd5b3ca6SJunchao Zhang }
636dd5b3ca6SJunchao Zhang 
637dd5b3ca6SJunchao Zhang /*@
638cab54364SBarry Smith   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
63995fce210SBarry Smith 
64095fce210SBarry Smith   Collective
64195fce210SBarry Smith 
6424165533cSJose E. Roman   Input Parameter:
64395fce210SBarry Smith . sf - star forest to invert
64495fce210SBarry Smith 
6454165533cSJose E. Roman   Output Parameter:
64620662ed9SBarry Smith . isf - inverse of `sf`
6474165533cSJose E. Roman 
64895fce210SBarry Smith   Level: advanced
64995fce210SBarry Smith 
65095fce210SBarry Smith   Notes:
65195fce210SBarry Smith   All roots must have degree 1.
65295fce210SBarry Smith 
65395fce210SBarry Smith   The local space may be a permutation, but cannot be sparse.
65495fce210SBarry Smith 
65520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
65695fce210SBarry Smith @*/
657d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
658d71ae5a4SJacob Faibussowitsch {
65995fce210SBarry Smith   PetscMPIInt     rank;
66095fce210SBarry Smith   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
66195fce210SBarry Smith   const PetscInt *ilocal;
66295fce210SBarry Smith   PetscSFNode    *roots, *leaves;
66395fce210SBarry Smith 
66495fce210SBarry Smith   PetscFunctionBegin;
66529046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
66629046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
6674f572ea9SToby Isaac   PetscAssertPointer(isf, 2);
66829046d53SLisandro Dalcin 
6699566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
67029046d53SLisandro Dalcin   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
67129046d53SLisandro Dalcin 
6729566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
6739566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
674ae9aee6dSMatthew G. Knepley   for (i = 0; i < maxlocal; i++) {
67595fce210SBarry Smith     leaves[i].rank  = rank;
67695fce210SBarry Smith     leaves[i].index = i;
67795fce210SBarry Smith   }
67895fce210SBarry Smith   for (i = 0; i < nroots; i++) {
67995fce210SBarry Smith     roots[i].rank  = -1;
68095fce210SBarry Smith     roots[i].index = -1;
68195fce210SBarry Smith   }
6829566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
6839566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
68495fce210SBarry Smith 
68595fce210SBarry Smith   /* Check whether our leaves are sparse */
6869371c9d4SSatish Balay   for (i = 0, count = 0; i < nroots; i++)
6879371c9d4SSatish Balay     if (roots[i].rank >= 0) count++;
68895fce210SBarry Smith   if (count == nroots) newilocal = NULL;
6899371c9d4SSatish Balay   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
69095fce210SBarry Smith     for (i = 0, count = 0; i < nroots; i++) {
69195fce210SBarry Smith       if (roots[i].rank >= 0) {
69295fce210SBarry Smith         newilocal[count]   = i;
69395fce210SBarry Smith         roots[count].rank  = roots[i].rank;
69495fce210SBarry Smith         roots[count].index = roots[i].index;
69595fce210SBarry Smith         count++;
69695fce210SBarry Smith       }
69795fce210SBarry Smith     }
69895fce210SBarry Smith   }
69995fce210SBarry Smith 
7009566063dSJacob Faibussowitsch   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
7019566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
7029566063dSJacob Faibussowitsch   PetscCall(PetscFree2(roots, leaves));
7033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
70495fce210SBarry Smith }
70595fce210SBarry Smith 
70695fce210SBarry Smith /*@
707cab54364SBarry Smith   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
70895fce210SBarry Smith 
70995fce210SBarry Smith   Collective
71095fce210SBarry Smith 
7114165533cSJose E. Roman   Input Parameters:
71295fce210SBarry Smith + sf  - communication object to duplicate
713cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
71495fce210SBarry Smith 
7154165533cSJose E. Roman   Output Parameter:
71695fce210SBarry Smith . newsf - new communication object
71795fce210SBarry Smith 
71895fce210SBarry Smith   Level: beginner
71995fce210SBarry Smith 
72020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
72195fce210SBarry Smith @*/
722d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
723d71ae5a4SJacob Faibussowitsch {
72429046d53SLisandro Dalcin   PetscSFType  type;
72597929ea7SJunchao Zhang   MPI_Datatype dtype = MPIU_SCALAR;
72695fce210SBarry Smith 
72795fce210SBarry Smith   PetscFunctionBegin;
72829046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
72929046d53SLisandro Dalcin   PetscValidLogicalCollectiveEnum(sf, opt, 2);
7304f572ea9SToby Isaac   PetscAssertPointer(newsf, 3);
7319566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
7329566063dSJacob Faibussowitsch   PetscCall(PetscSFGetType(sf, &type));
7339566063dSJacob Faibussowitsch   if (type) PetscCall(PetscSFSetType(*newsf, type));
73435cb6cd3SPierre Jolivet   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
73595fce210SBarry Smith   if (opt == PETSCSF_DUPLICATE_GRAPH) {
736dd5b3ca6SJunchao Zhang     PetscSFCheckGraphSet(sf, 1);
737dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
73895fce210SBarry Smith       PetscInt           nroots, nleaves;
73995fce210SBarry Smith       const PetscInt    *ilocal;
74095fce210SBarry Smith       const PetscSFNode *iremote;
7419566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
7429566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
743dd5b3ca6SJunchao Zhang     } else {
7449566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
745dd5b3ca6SJunchao Zhang     }
74695fce210SBarry Smith   }
74797929ea7SJunchao Zhang   /* Since oldtype is committed, so is newtype, according to MPI */
7489566063dSJacob Faibussowitsch   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
74997929ea7SJunchao Zhang   (*newsf)->vscat.bs     = sf->vscat.bs;
75097929ea7SJunchao Zhang   (*newsf)->vscat.unit   = dtype;
75197929ea7SJunchao Zhang   (*newsf)->vscat.to_n   = sf->vscat.to_n;
75297929ea7SJunchao Zhang   (*newsf)->vscat.from_n = sf->vscat.from_n;
75397929ea7SJunchao Zhang   /* Do not copy lsf. Build it on demand since it is rarely used */
75497929ea7SJunchao Zhang 
75520c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
75620c24465SJunchao Zhang   (*newsf)->backend              = sf->backend;
75771438e86SJunchao Zhang   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
75820c24465SJunchao Zhang   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
75920c24465SJunchao Zhang   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
76020c24465SJunchao Zhang #endif
761dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
76220c24465SJunchao Zhang   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
7633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
76495fce210SBarry Smith }
76595fce210SBarry Smith 
76695fce210SBarry Smith /*@C
76795fce210SBarry Smith   PetscSFGetGraph - Get the graph specifying a parallel star forest
76895fce210SBarry Smith 
76995fce210SBarry Smith   Not Collective
77095fce210SBarry Smith 
7714165533cSJose E. Roman   Input Parameter:
77295fce210SBarry Smith . sf - star forest
77395fce210SBarry Smith 
7744165533cSJose E. Roman   Output Parameters:
77595fce210SBarry Smith + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
77695fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process
77720662ed9SBarry Smith . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
77895fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process
77995fce210SBarry Smith 
780cab54364SBarry Smith   Level: intermediate
781cab54364SBarry Smith 
782373e0d91SLisandro Dalcin   Notes:
78320662ed9SBarry Smith   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
784373e0d91SLisandro Dalcin 
78520662ed9SBarry Smith   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
786db2b9530SVaclav Hapla 
7878dbb0df6SBarry Smith   Fortran Notes:
78820662ed9SBarry Smith   The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
78920662ed9SBarry Smith   want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
7908dbb0df6SBarry Smith 
79120662ed9SBarry Smith   To check for a `NULL` `ilocal` use
7928dbb0df6SBarry Smith $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
793ca797d7aSLawrence Mitchell 
79420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
79595fce210SBarry Smith @*/
796d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
797d71ae5a4SJacob Faibussowitsch {
79895fce210SBarry Smith   PetscFunctionBegin;
79995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
800b8dee149SJunchao Zhang   if (sf->ops->GetGraph) {
801f4f49eeaSPierre Jolivet     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
802b8dee149SJunchao Zhang   } else {
80395fce210SBarry Smith     if (nroots) *nroots = sf->nroots;
80495fce210SBarry Smith     if (nleaves) *nleaves = sf->nleaves;
80595fce210SBarry Smith     if (ilocal) *ilocal = sf->mine;
80695fce210SBarry Smith     if (iremote) *iremote = sf->remote;
807b8dee149SJunchao Zhang   }
8083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
80995fce210SBarry Smith }
81095fce210SBarry Smith 
81129046d53SLisandro Dalcin /*@
81295fce210SBarry Smith   PetscSFGetLeafRange - Get the active leaf ranges
81395fce210SBarry Smith 
81495fce210SBarry Smith   Not Collective
81595fce210SBarry Smith 
8164165533cSJose E. Roman   Input Parameter:
81795fce210SBarry Smith . sf - star forest
81895fce210SBarry Smith 
8194165533cSJose E. Roman   Output Parameters:
82020662ed9SBarry Smith + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
82120662ed9SBarry Smith - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
82295fce210SBarry Smith 
82395fce210SBarry Smith   Level: developer
82495fce210SBarry Smith 
82520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
82695fce210SBarry Smith @*/
827d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
828d71ae5a4SJacob Faibussowitsch {
82995fce210SBarry Smith   PetscFunctionBegin;
83095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
83129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
83295fce210SBarry Smith   if (minleaf) *minleaf = sf->minleaf;
83395fce210SBarry Smith   if (maxleaf) *maxleaf = sf->maxleaf;
8343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
83595fce210SBarry Smith }
83695fce210SBarry Smith 
837ffeef943SBarry Smith /*@
838cab54364SBarry Smith   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
839fe2efc57SMark 
84020f4b53cSBarry Smith   Collective
841fe2efc57SMark 
842fe2efc57SMark   Input Parameters:
843fe2efc57SMark + A    - the star forest
844cab54364SBarry Smith . obj  - Optional object that provides the prefix for the option names
845736c3998SJose E. Roman - name - command line option
846fe2efc57SMark 
847fe2efc57SMark   Level: intermediate
848cab54364SBarry Smith 
84920662ed9SBarry Smith   Note:
85020662ed9SBarry Smith   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
85120662ed9SBarry Smith 
852db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
853fe2efc57SMark @*/
854d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
855d71ae5a4SJacob Faibussowitsch {
856fe2efc57SMark   PetscFunctionBegin;
857fe2efc57SMark   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
8589566063dSJacob Faibussowitsch   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
8593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
860fe2efc57SMark }
861fe2efc57SMark 
862ffeef943SBarry Smith /*@
86395fce210SBarry Smith   PetscSFView - view a star forest
86495fce210SBarry Smith 
86595fce210SBarry Smith   Collective
86695fce210SBarry Smith 
8674165533cSJose E. Roman   Input Parameters:
86895fce210SBarry Smith + sf     - star forest
869cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
87095fce210SBarry Smith 
87195fce210SBarry Smith   Level: beginner
87295fce210SBarry Smith 
873cab54364SBarry Smith .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
87495fce210SBarry Smith @*/
875d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
876d71ae5a4SJacob Faibussowitsch {
87795fce210SBarry Smith   PetscBool         iascii;
87895fce210SBarry Smith   PetscViewerFormat format;
87995fce210SBarry Smith 
88095fce210SBarry Smith   PetscFunctionBegin;
88195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
8829566063dSJacob Faibussowitsch   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
88395fce210SBarry Smith   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
88495fce210SBarry Smith   PetscCheckSameComm(sf, 1, viewer, 2);
8859566063dSJacob Faibussowitsch   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
8869566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
88753dd6d7dSJunchao Zhang   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
88895fce210SBarry Smith     PetscMPIInt rank;
88981bfa7aaSJed Brown     PetscInt    ii, i, j;
89095fce210SBarry Smith 
8919566063dSJacob Faibussowitsch     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
8929566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPushTab(viewer));
893dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
89480153354SVaclav Hapla       if (!sf->graphset) {
8959566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
8969566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopTab(viewer));
8973ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
89880153354SVaclav Hapla       }
8999566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
9009566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9019566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
90248a46eb9SPierre Jolivet       for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
9039566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9049566063dSJacob Faibussowitsch       PetscCall(PetscViewerGetFormat(viewer, &format));
90595fce210SBarry Smith       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
90681bfa7aaSJed Brown         PetscMPIInt *tmpranks, *perm;
9079566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
9089566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
90981bfa7aaSJed Brown         for (i = 0; i < sf->nranks; i++) perm[i] = i;
9109566063dSJacob Faibussowitsch         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
9119566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
91281bfa7aaSJed Brown         for (ii = 0; ii < sf->nranks; ii++) {
91381bfa7aaSJed Brown           i = perm[ii];
9149566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
91548a46eb9SPierre Jolivet           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
91695fce210SBarry Smith         }
9179566063dSJacob Faibussowitsch         PetscCall(PetscFree2(tmpranks, perm));
91895fce210SBarry Smith       }
9199566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9209566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
921dd5b3ca6SJunchao Zhang     }
9229566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPopTab(viewer));
92395fce210SBarry Smith   }
924dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, View, viewer);
9253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
92695fce210SBarry Smith }
92795fce210SBarry Smith 
92895fce210SBarry Smith /*@C
929dec1416fSJunchao Zhang   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
93095fce210SBarry Smith 
93195fce210SBarry Smith   Not Collective
93295fce210SBarry Smith 
9334165533cSJose E. Roman   Input Parameter:
93495fce210SBarry Smith . sf - star forest
93595fce210SBarry Smith 
9364165533cSJose E. Roman   Output Parameters:
93795fce210SBarry Smith + nranks  - number of ranks referenced by local part
93820662ed9SBarry Smith . ranks   - [`nranks`] array of ranks
93920662ed9SBarry Smith . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
94020662ed9SBarry Smith . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank
94120662ed9SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank
94295fce210SBarry Smith 
94395fce210SBarry Smith   Level: developer
94495fce210SBarry Smith 
945cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
94695fce210SBarry Smith @*/
947d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
948d71ae5a4SJacob Faibussowitsch {
94995fce210SBarry Smith   PetscFunctionBegin;
95095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
95128b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
952dec1416fSJunchao Zhang   if (sf->ops->GetRootRanks) {
9539927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
954dec1416fSJunchao Zhang   } else {
955dec1416fSJunchao Zhang     /* The generic implementation */
95695fce210SBarry Smith     if (nranks) *nranks = sf->nranks;
95795fce210SBarry Smith     if (ranks) *ranks = sf->ranks;
95895fce210SBarry Smith     if (roffset) *roffset = sf->roffset;
95995fce210SBarry Smith     if (rmine) *rmine = sf->rmine;
96095fce210SBarry Smith     if (rremote) *rremote = sf->rremote;
961dec1416fSJunchao Zhang   }
9623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
96395fce210SBarry Smith }
96495fce210SBarry Smith 
9658750ddebSJunchao Zhang /*@C
9668750ddebSJunchao Zhang   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
9678750ddebSJunchao Zhang 
9688750ddebSJunchao Zhang   Not Collective
9698750ddebSJunchao Zhang 
9704165533cSJose E. Roman   Input Parameter:
9718750ddebSJunchao Zhang . sf - star forest
9728750ddebSJunchao Zhang 
9734165533cSJose E. Roman   Output Parameters:
9748750ddebSJunchao Zhang + niranks  - number of leaf ranks referencing roots on this process
97520662ed9SBarry Smith . iranks   - [`niranks`] array of ranks
97620662ed9SBarry Smith . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
97720662ed9SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
9788750ddebSJunchao Zhang 
9798750ddebSJunchao Zhang   Level: developer
9808750ddebSJunchao Zhang 
981cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
9828750ddebSJunchao Zhang @*/
983d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
984d71ae5a4SJacob Faibussowitsch {
9858750ddebSJunchao Zhang   PetscFunctionBegin;
9868750ddebSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
98728b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
9888750ddebSJunchao Zhang   if (sf->ops->GetLeafRanks) {
9899927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
9908750ddebSJunchao Zhang   } else {
9918750ddebSJunchao Zhang     PetscSFType type;
9929566063dSJacob Faibussowitsch     PetscCall(PetscSFGetType(sf, &type));
99398921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
9948750ddebSJunchao Zhang   }
9953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9968750ddebSJunchao Zhang }
9978750ddebSJunchao Zhang 
998d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
999d71ae5a4SJacob Faibussowitsch {
1000b5a8e515SJed Brown   PetscInt i;
1001b5a8e515SJed Brown   for (i = 0; i < n; i++) {
1002b5a8e515SJed Brown     if (needle == list[i]) return PETSC_TRUE;
1003b5a8e515SJed Brown   }
1004b5a8e515SJed Brown   return PETSC_FALSE;
1005b5a8e515SJed Brown }
1006b5a8e515SJed Brown 
100795fce210SBarry Smith /*@C
1008cab54364SBarry Smith   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
100921c688dcSJed Brown 
101021c688dcSJed Brown   Collective
101121c688dcSJed Brown 
10124165533cSJose E. Roman   Input Parameters:
1013cab54364SBarry Smith + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1014cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
101521c688dcSJed Brown 
101621c688dcSJed Brown   Level: developer
101721c688dcSJed Brown 
1018cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
101921c688dcSJed Brown @*/
1020d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1021d71ae5a4SJacob Faibussowitsch {
1022eec179cfSJacob Faibussowitsch   PetscHMapI    table;
1023eec179cfSJacob Faibussowitsch   PetscHashIter pos;
1024b5a8e515SJed Brown   PetscMPIInt   size, groupsize, *groupranks;
1025247e8311SStefano Zampini   PetscInt     *rcount, *ranks;
1026247e8311SStefano Zampini   PetscInt      i, irank = -1, orank = -1;
102721c688dcSJed Brown 
102821c688dcSJed Brown   PetscFunctionBegin;
102921c688dcSJed Brown   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
103029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
10319566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1032eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(10, &table));
103321c688dcSJed Brown   for (i = 0; i < sf->nleaves; i++) {
103421c688dcSJed Brown     /* Log 1-based rank */
1035eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
103621c688dcSJed Brown   }
1037eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIGetSize(table, &sf->nranks));
10389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
10399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1040eec179cfSJacob Faibussowitsch   PetscHashIterBegin(table, pos);
104121c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
1042eec179cfSJacob Faibussowitsch     PetscHashIterGetKey(table, pos, ranks[i]);
1043eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(table, pos, rcount[i]);
1044eec179cfSJacob Faibussowitsch     PetscHashIterNext(table, pos);
104521c688dcSJed Brown     ranks[i]--; /* Convert back to 0-based */
104621c688dcSJed Brown   }
1047eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&table));
1048b5a8e515SJed Brown 
1049b5a8e515SJed Brown   /* We expect that dgroup is reliably "small" while nranks could be large */
1050b5a8e515SJed Brown   {
10517fb8a5e4SKarl Rupp     MPI_Group    group = MPI_GROUP_NULL;
1052b5a8e515SJed Brown     PetscMPIInt *dgroupranks;
10539566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
10549566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
10559566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
10569566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &groupranks));
1057b5a8e515SJed Brown     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
10589566063dSJacob Faibussowitsch     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
10599566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
10609566063dSJacob Faibussowitsch     PetscCall(PetscFree(dgroupranks));
1061b5a8e515SJed Brown   }
1062b5a8e515SJed Brown 
1063b5a8e515SJed Brown   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1064b5a8e515SJed Brown   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1065b5a8e515SJed Brown     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1066b5a8e515SJed Brown       if (InList(ranks[i], groupsize, groupranks)) break;
1067b5a8e515SJed Brown     }
1068b5a8e515SJed Brown     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1069b5a8e515SJed Brown       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1070b5a8e515SJed Brown     }
1071b5a8e515SJed Brown     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1072b5a8e515SJed Brown       PetscInt tmprank, tmpcount;
1073247e8311SStefano Zampini 
1074b5a8e515SJed Brown       tmprank             = ranks[i];
1075b5a8e515SJed Brown       tmpcount            = rcount[i];
1076b5a8e515SJed Brown       ranks[i]            = ranks[sf->ndranks];
1077b5a8e515SJed Brown       rcount[i]           = rcount[sf->ndranks];
1078b5a8e515SJed Brown       ranks[sf->ndranks]  = tmprank;
1079b5a8e515SJed Brown       rcount[sf->ndranks] = tmpcount;
1080b5a8e515SJed Brown       sf->ndranks++;
1081b5a8e515SJed Brown     }
1082b5a8e515SJed Brown   }
10839566063dSJacob Faibussowitsch   PetscCall(PetscFree(groupranks));
10849566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
10855c0db29aSPierre Jolivet   if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
108621c688dcSJed Brown   sf->roffset[0] = 0;
108721c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
10889566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
108921c688dcSJed Brown     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
109021c688dcSJed Brown     rcount[i]          = 0;
109121c688dcSJed Brown   }
1092247e8311SStefano Zampini   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1093247e8311SStefano Zampini     /* short circuit */
1094247e8311SStefano Zampini     if (orank != sf->remote[i].rank) {
109521c688dcSJed Brown       /* Search for index of iremote[i].rank in sf->ranks */
10969566063dSJacob Faibussowitsch       PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1097b5a8e515SJed Brown       if (irank < 0) {
10989566063dSJacob Faibussowitsch         PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1099b5a8e515SJed Brown         if (irank >= 0) irank += sf->ndranks;
110021c688dcSJed Brown       }
1101247e8311SStefano Zampini       orank = sf->remote[i].rank;
1102247e8311SStefano Zampini     }
110308401ef6SPierre Jolivet     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
110421c688dcSJed Brown     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
110521c688dcSJed Brown     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
110621c688dcSJed Brown     rcount[irank]++;
110721c688dcSJed Brown   }
11089566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rcount, ranks));
11093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
111021c688dcSJed Brown }
111121c688dcSJed Brown 
111221c688dcSJed Brown /*@C
111395fce210SBarry Smith   PetscSFGetGroups - gets incoming and outgoing process groups
111495fce210SBarry Smith 
111595fce210SBarry Smith   Collective
111695fce210SBarry Smith 
11174165533cSJose E. Roman   Input Parameter:
111895fce210SBarry Smith . sf - star forest
111995fce210SBarry Smith 
11204165533cSJose E. Roman   Output Parameters:
112195fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots)
112295fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference)
112395fce210SBarry Smith 
112495fce210SBarry Smith   Level: developer
112595fce210SBarry Smith 
1126cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
112795fce210SBarry Smith @*/
1128d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1129d71ae5a4SJacob Faibussowitsch {
11307fb8a5e4SKarl Rupp   MPI_Group group = MPI_GROUP_NULL;
113195fce210SBarry Smith 
113295fce210SBarry Smith   PetscFunctionBegin;
113308401ef6SPierre Jolivet   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
113495fce210SBarry Smith   if (sf->ingroup == MPI_GROUP_NULL) {
113595fce210SBarry Smith     PetscInt        i;
113695fce210SBarry Smith     const PetscInt *indegree;
113795fce210SBarry Smith     PetscMPIInt     rank, *outranks, *inranks;
113895fce210SBarry Smith     PetscSFNode    *remote;
113995fce210SBarry Smith     PetscSF         bgcount;
114095fce210SBarry Smith 
114195fce210SBarry Smith     /* Compute the number of incoming ranks */
11429566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nranks, &remote));
114395fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) {
114495fce210SBarry Smith       remote[i].rank  = sf->ranks[i];
114595fce210SBarry Smith       remote[i].index = 0;
114695fce210SBarry Smith     }
11479566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
11489566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
11499566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
11509566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
115195fce210SBarry Smith     /* Enumerate the incoming ranks */
11529566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
11539566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
115495fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
11559566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
11569566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
11579566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11589566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
11599566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
11609566063dSJacob Faibussowitsch     PetscCall(PetscFree2(inranks, outranks));
11619566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&bgcount));
116295fce210SBarry Smith   }
116395fce210SBarry Smith   *incoming = sf->ingroup;
116495fce210SBarry Smith 
116595fce210SBarry Smith   if (sf->outgroup == MPI_GROUP_NULL) {
11669566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11679566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
11689566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
116995fce210SBarry Smith   }
117095fce210SBarry Smith   *outgoing = sf->outgroup;
11713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
117295fce210SBarry Smith }
117395fce210SBarry Smith 
117429046d53SLisandro Dalcin /*@
11750dd791a8SStefano Zampini   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
11760dd791a8SStefano Zampini 
11770dd791a8SStefano Zampini   Collective
11780dd791a8SStefano Zampini 
11790dd791a8SStefano Zampini   Input Parameter:
11800dd791a8SStefano Zampini . sf - star forest
11810dd791a8SStefano Zampini 
11820dd791a8SStefano Zampini   Output Parameter:
11830dd791a8SStefano Zampini . rsf - the star forest with a single root per process to perform communications
11840dd791a8SStefano Zampini 
11850dd791a8SStefano Zampini   Level: developer
11860dd791a8SStefano Zampini 
11870dd791a8SStefano Zampini .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
11880dd791a8SStefano Zampini @*/
11890dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
11900dd791a8SStefano Zampini {
11910dd791a8SStefano Zampini   PetscFunctionBegin;
11920dd791a8SStefano Zampini   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
11930dd791a8SStefano Zampini   PetscAssertPointer(rsf, 2);
11940dd791a8SStefano Zampini   if (!sf->rankssf) {
11950dd791a8SStefano Zampini     PetscSFNode       *rremotes;
11960dd791a8SStefano Zampini     const PetscMPIInt *ranks;
11970dd791a8SStefano Zampini     PetscInt           nranks;
11980dd791a8SStefano Zampini 
11990dd791a8SStefano Zampini     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
12000dd791a8SStefano Zampini     PetscCall(PetscMalloc1(nranks, &rremotes));
12010dd791a8SStefano Zampini     for (PetscInt i = 0; i < nranks; i++) {
12020dd791a8SStefano Zampini       rremotes[i].rank  = ranks[i];
12030dd791a8SStefano Zampini       rremotes[i].index = 0;
12040dd791a8SStefano Zampini     }
12050dd791a8SStefano Zampini     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
12060dd791a8SStefano Zampini     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
12070dd791a8SStefano Zampini   }
12080dd791a8SStefano Zampini   *rsf = sf->rankssf;
12090dd791a8SStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
12100dd791a8SStefano Zampini }
12110dd791a8SStefano Zampini 
12120dd791a8SStefano Zampini /*@
1213cab54364SBarry Smith   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
121495fce210SBarry Smith 
121595fce210SBarry Smith   Collective
121695fce210SBarry Smith 
12174165533cSJose E. Roman   Input Parameter:
121895fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex
121995fce210SBarry Smith 
12204165533cSJose E. Roman   Output Parameter:
122195fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1
122295fce210SBarry Smith 
122395fce210SBarry Smith   Level: developer
122495fce210SBarry Smith 
1225cab54364SBarry Smith   Note:
1226cab54364SBarry Smith   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
122795fce210SBarry Smith   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
122895fce210SBarry Smith   edge, it is a candidate for future optimization that might involve its removal.
122995fce210SBarry Smith 
1230cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
123195fce210SBarry Smith @*/
1232d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1233d71ae5a4SJacob Faibussowitsch {
123495fce210SBarry Smith   PetscFunctionBegin;
123595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12364f572ea9SToby Isaac   PetscAssertPointer(multi, 2);
123795fce210SBarry Smith   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
12389566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
123995fce210SBarry Smith     *multi           = sf->multi;
1240013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12413ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
124295fce210SBarry Smith   }
124395fce210SBarry Smith   if (!sf->multi) {
124495fce210SBarry Smith     const PetscInt *indegree;
12459837ea96SMatthew G. Knepley     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
124695fce210SBarry Smith     PetscSFNode    *remote;
124729046d53SLisandro Dalcin     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
12489566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
12499566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
12509566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
125195fce210SBarry Smith     inoffset[0] = 0;
125295fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
12539837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) outones[i] = 1;
12549566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
12559566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
125695fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
125776bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1258ad540459SPierre Jolivet       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
125976bd3646SJed Brown     }
12609566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nleaves, &remote));
126195fce210SBarry Smith     for (i = 0; i < sf->nleaves; i++) {
126295fce210SBarry Smith       remote[i].rank  = sf->remote[i].rank;
126338e7336fSToby Isaac       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
126495fce210SBarry Smith     }
12659566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1266013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12679566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
126895fce210SBarry Smith     if (sf->rankorder) { /* Sort the ranks */
126995fce210SBarry Smith       PetscMPIInt  rank;
127095fce210SBarry Smith       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
127195fce210SBarry Smith       PetscSFNode *newremote;
12729566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
127395fce210SBarry Smith       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
12749566063dSJacob Faibussowitsch       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
12759837ea96SMatthew G. Knepley       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
12769566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
12779566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
127895fce210SBarry Smith       /* Sort the incoming ranks at each vertex, build the inverse map */
127995fce210SBarry Smith       for (i = 0; i < sf->nroots; i++) {
128095fce210SBarry Smith         PetscInt j;
128195fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
12828e3a54c0SPierre Jolivet         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
128395fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
128495fce210SBarry Smith       }
12859566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
12869566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
12879566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
128895fce210SBarry Smith       for (i = 0; i < sf->nleaves; i++) {
128995fce210SBarry Smith         newremote[i].rank  = sf->remote[i].rank;
129001365b40SToby Isaac         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
129195fce210SBarry Smith       }
12929566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
12939566063dSJacob Faibussowitsch       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
129495fce210SBarry Smith     }
12959566063dSJacob Faibussowitsch     PetscCall(PetscFree3(inoffset, outones, outoffset));
129695fce210SBarry Smith   }
129795fce210SBarry Smith   *multi = sf->multi;
12983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
129995fce210SBarry Smith }
130095fce210SBarry Smith 
130195fce210SBarry Smith /*@C
130220662ed9SBarry Smith   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
130395fce210SBarry Smith 
130495fce210SBarry Smith   Collective
130595fce210SBarry Smith 
13064165533cSJose E. Roman   Input Parameters:
130795fce210SBarry Smith + sf        - original star forest
1308ba2a7774SJunchao Zhang . nselected - number of selected roots on this process
1309ba2a7774SJunchao Zhang - selected  - indices of the selected roots on this process
131095fce210SBarry Smith 
13114165533cSJose E. Roman   Output Parameter:
1312cd620004SJunchao Zhang . esf - new star forest
131395fce210SBarry Smith 
131495fce210SBarry Smith   Level: advanced
131595fce210SBarry Smith 
131695fce210SBarry Smith   Note:
1317cab54364SBarry Smith   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
131895fce210SBarry Smith   be done by calling PetscSFGetGraph().
131995fce210SBarry Smith 
1320cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
132195fce210SBarry Smith @*/
1322d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1323d71ae5a4SJacob Faibussowitsch {
1324cd620004SJunchao Zhang   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1325cd620004SJunchao Zhang   const PetscInt    *ilocal;
1326cd620004SJunchao Zhang   signed char       *rootdata, *leafdata, *leafmem;
1327ba2a7774SJunchao Zhang   const PetscSFNode *iremote;
1328f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1329f659e5c7SJunchao Zhang   MPI_Comm           comm;
133095fce210SBarry Smith 
133195fce210SBarry Smith   PetscFunctionBegin;
133295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
133329046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
13344f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
13354f572ea9SToby Isaac   PetscAssertPointer(esf, 4);
13360511a646SMatthew G. Knepley 
13379566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
13389566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
13399566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
13409566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1341cd620004SJunchao Zhang 
134276bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1343cd620004SJunchao Zhang     PetscBool dups;
13449566063dSJacob Faibussowitsch     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
134528b400f6SJacob Faibussowitsch     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1346511e6246SStefano Zampini     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1347cd620004SJunchao Zhang   }
1348f659e5c7SJunchao Zhang 
1349dbbe0bcdSBarry Smith   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1350dbbe0bcdSBarry Smith   else {
1351cd620004SJunchao Zhang     /* A generic version of creating embedded sf */
13529566063dSJacob Faibussowitsch     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1353cd620004SJunchao Zhang     maxlocal = maxleaf - minleaf + 1;
13549566063dSJacob Faibussowitsch     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
13558e3a54c0SPierre Jolivet     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1356cd620004SJunchao Zhang     /* Tag selected roots and bcast to leaves */
1357cd620004SJunchao Zhang     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
13589566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
13599566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1360ba2a7774SJunchao Zhang 
1361cd620004SJunchao Zhang     /* Build esf with leaves that are still connected */
1362cd620004SJunchao Zhang     esf_nleaves = 0;
1363cd620004SJunchao Zhang     for (i = 0; i < nleaves; i++) {
1364cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1365cd620004SJunchao Zhang       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1366cd620004SJunchao Zhang          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1367cd620004SJunchao Zhang       */
1368cd620004SJunchao Zhang       esf_nleaves += (leafdata[j] ? 1 : 0);
1369cd620004SJunchao Zhang     }
13709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
13719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1372cd620004SJunchao Zhang     for (i = n = 0; i < nleaves; i++) {
1373cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1374cd620004SJunchao Zhang       if (leafdata[j]) {
1375cd620004SJunchao Zhang         new_ilocal[n]        = j;
1376cd620004SJunchao Zhang         new_iremote[n].rank  = iremote[i].rank;
1377cd620004SJunchao Zhang         new_iremote[n].index = iremote[i].index;
1378fc1ede2bSMatthew G. Knepley         ++n;
137995fce210SBarry Smith       }
138095fce210SBarry Smith     }
13819566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, esf));
13829566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(*esf));
13839566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
13849566063dSJacob Faibussowitsch     PetscCall(PetscFree2(rootdata, leafmem));
1385f659e5c7SJunchao Zhang   }
13869566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
13873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
138895fce210SBarry Smith }
138995fce210SBarry Smith 
13902f5fb4c2SMatthew G. Knepley /*@C
139120662ed9SBarry Smith   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
13922f5fb4c2SMatthew G. Knepley 
13932f5fb4c2SMatthew G. Knepley   Collective
13942f5fb4c2SMatthew G. Knepley 
13954165533cSJose E. Roman   Input Parameters:
13962f5fb4c2SMatthew G. Knepley + sf        - original star forest
1397f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process
1398f659e5c7SJunchao Zhang - selected  - indices of the selected leaves on this process
13992f5fb4c2SMatthew G. Knepley 
14004165533cSJose E. Roman   Output Parameter:
14012f5fb4c2SMatthew G. Knepley . newsf - new star forest
14022f5fb4c2SMatthew G. Knepley 
14032f5fb4c2SMatthew G. Knepley   Level: advanced
14042f5fb4c2SMatthew G. Knepley 
1405cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
14062f5fb4c2SMatthew G. Knepley @*/
1407d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1408d71ae5a4SJacob Faibussowitsch {
1409f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
1410f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1411f659e5c7SJunchao Zhang   const PetscInt    *ilocal;
1412f659e5c7SJunchao Zhang   PetscInt           i, nroots, *leaves, *new_ilocal;
1413f659e5c7SJunchao Zhang   MPI_Comm           comm;
14142f5fb4c2SMatthew G. Knepley 
14152f5fb4c2SMatthew G. Knepley   PetscFunctionBegin;
14162f5fb4c2SMatthew G. Knepley   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
141729046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
14184f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
14194f572ea9SToby Isaac   PetscAssertPointer(newsf, 4);
14202f5fb4c2SMatthew G. Knepley 
1421f659e5c7SJunchao Zhang   /* Uniq selected[] and put results in leaves[] */
14229566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
14239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &leaves));
14249566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(leaves, selected, nselected));
14259566063dSJacob Faibussowitsch   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
142608401ef6SPierre Jolivet   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1427f659e5c7SJunchao Zhang 
1428f659e5c7SJunchao Zhang   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1429dbbe0bcdSBarry Smith   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1430dbbe0bcdSBarry Smith   else {
14319566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
14329566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_ilocal));
14339566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_iremote));
1434f659e5c7SJunchao Zhang     for (i = 0; i < nselected; ++i) {
1435f659e5c7SJunchao Zhang       const PetscInt l     = leaves[i];
1436f659e5c7SJunchao Zhang       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1437f659e5c7SJunchao Zhang       new_iremote[i].rank  = iremote[l].rank;
1438f659e5c7SJunchao Zhang       new_iremote[i].index = iremote[l].index;
14392f5fb4c2SMatthew G. Knepley     }
14409566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
14419566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1442f659e5c7SJunchao Zhang   }
14439566063dSJacob Faibussowitsch   PetscCall(PetscFree(leaves));
14443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14452f5fb4c2SMatthew G. Knepley }
14462f5fb4c2SMatthew G. Knepley 
144795fce210SBarry Smith /*@C
1448cab54364SBarry Smith   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
14493482bfa8SJunchao Zhang 
1450c3339decSBarry Smith   Collective
14513482bfa8SJunchao Zhang 
14524165533cSJose E. Roman   Input Parameters:
14533482bfa8SJunchao Zhang + sf       - star forest on which to communicate
14543482bfa8SJunchao Zhang . unit     - data type associated with each node
14553482bfa8SJunchao Zhang . rootdata - buffer to broadcast
14563482bfa8SJunchao Zhang - op       - operation to use for reduction
14573482bfa8SJunchao Zhang 
14584165533cSJose E. Roman   Output Parameter:
14593482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
14603482bfa8SJunchao Zhang 
14613482bfa8SJunchao Zhang   Level: intermediate
14623482bfa8SJunchao Zhang 
146320662ed9SBarry Smith   Note:
146420662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1465da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1466cab54364SBarry Smith   use `PetscSFBcastWithMemTypeBegin()` instead.
1467cab54364SBarry Smith 
1468cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
14693482bfa8SJunchao Zhang @*/
1470d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1471d71ae5a4SJacob Faibussowitsch {
1472eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
14733482bfa8SJunchao Zhang 
14743482bfa8SJunchao Zhang   PetscFunctionBegin;
14753482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14769566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
14779566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
14789566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
14799566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1480dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
14819566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
14823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14833482bfa8SJunchao Zhang }
14843482bfa8SJunchao Zhang 
14853482bfa8SJunchao Zhang /*@C
148620662ed9SBarry Smith   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
148720662ed9SBarry Smith   to `PetscSFBcastEnd()`
1488d0295fc0SJunchao Zhang 
1489c3339decSBarry Smith   Collective
1490d0295fc0SJunchao Zhang 
14914165533cSJose E. Roman   Input Parameters:
1492d0295fc0SJunchao Zhang + sf        - star forest on which to communicate
1493d0295fc0SJunchao Zhang . unit      - data type associated with each node
1494d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1495d0295fc0SJunchao Zhang . rootdata  - buffer to broadcast
1496d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1497d0295fc0SJunchao Zhang - op        - operation to use for reduction
1498d0295fc0SJunchao Zhang 
14994165533cSJose E. Roman   Output Parameter:
1500d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
1501d0295fc0SJunchao Zhang 
1502d0295fc0SJunchao Zhang   Level: intermediate
1503d0295fc0SJunchao Zhang 
1504cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1505d0295fc0SJunchao Zhang @*/
1506d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1507d71ae5a4SJacob Faibussowitsch {
1508d0295fc0SJunchao Zhang   PetscFunctionBegin;
1509d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15109566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15119566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1512dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15139566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1515d0295fc0SJunchao Zhang }
1516d0295fc0SJunchao Zhang 
1517d0295fc0SJunchao Zhang /*@C
151820662ed9SBarry Smith   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
15193482bfa8SJunchao Zhang 
15203482bfa8SJunchao Zhang   Collective
15213482bfa8SJunchao Zhang 
15224165533cSJose E. Roman   Input Parameters:
15233482bfa8SJunchao Zhang + sf       - star forest
15243482bfa8SJunchao Zhang . unit     - data type
15253482bfa8SJunchao Zhang . rootdata - buffer to broadcast
15263482bfa8SJunchao Zhang - op       - operation to use for reduction
15273482bfa8SJunchao Zhang 
15284165533cSJose E. Roman   Output Parameter:
15293482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
15303482bfa8SJunchao Zhang 
15313482bfa8SJunchao Zhang   Level: intermediate
15323482bfa8SJunchao Zhang 
1533cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
15343482bfa8SJunchao Zhang @*/
1535d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1536d71ae5a4SJacob Faibussowitsch {
15373482bfa8SJunchao Zhang   PetscFunctionBegin;
15383482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15399566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1540dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
15419566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
15423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15433482bfa8SJunchao Zhang }
15443482bfa8SJunchao Zhang 
15453482bfa8SJunchao Zhang /*@C
1546cab54364SBarry Smith   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
154795fce210SBarry Smith 
154895fce210SBarry Smith   Collective
154995fce210SBarry Smith 
15504165533cSJose E. Roman   Input Parameters:
155195fce210SBarry Smith + sf       - star forest
155295fce210SBarry Smith . unit     - data type
155395fce210SBarry Smith . leafdata - values to reduce
155495fce210SBarry Smith - op       - reduction operation
155595fce210SBarry Smith 
15564165533cSJose E. Roman   Output Parameter:
155795fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
155895fce210SBarry Smith 
155995fce210SBarry Smith   Level: intermediate
156095fce210SBarry Smith 
156120662ed9SBarry Smith   Note:
156220662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1563da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1564cab54364SBarry Smith   use `PetscSFReduceWithMemTypeBegin()` instead.
1565d0295fc0SJunchao Zhang 
156620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
156795fce210SBarry Smith @*/
1568d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1569d71ae5a4SJacob Faibussowitsch {
1570eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
157195fce210SBarry Smith 
157295fce210SBarry Smith   PetscFunctionBegin;
157395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15749566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15759566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15769566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
15779566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1578f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
15799566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
158195fce210SBarry Smith }
158295fce210SBarry Smith 
158395fce210SBarry Smith /*@C
1584cab54364SBarry Smith   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1585d0295fc0SJunchao Zhang 
1586d0295fc0SJunchao Zhang   Collective
1587d0295fc0SJunchao Zhang 
15884165533cSJose E. Roman   Input Parameters:
1589d0295fc0SJunchao Zhang + sf        - star forest
1590d0295fc0SJunchao Zhang . unit      - data type
1591d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1592d0295fc0SJunchao Zhang . leafdata  - values to reduce
1593d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1594d0295fc0SJunchao Zhang - op        - reduction operation
1595d0295fc0SJunchao Zhang 
15964165533cSJose E. Roman   Output Parameter:
1597d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root
1598d0295fc0SJunchao Zhang 
1599d0295fc0SJunchao Zhang   Level: intermediate
1600d0295fc0SJunchao Zhang 
160120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1602d0295fc0SJunchao Zhang @*/
1603d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1604d71ae5a4SJacob Faibussowitsch {
1605d0295fc0SJunchao Zhang   PetscFunctionBegin;
1606d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16079566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16089566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1609f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
16109566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1612d0295fc0SJunchao Zhang }
1613d0295fc0SJunchao Zhang 
1614d0295fc0SJunchao Zhang /*@C
161520662ed9SBarry Smith   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
161695fce210SBarry Smith 
161795fce210SBarry Smith   Collective
161895fce210SBarry Smith 
16194165533cSJose E. Roman   Input Parameters:
162095fce210SBarry Smith + sf       - star forest
162195fce210SBarry Smith . unit     - data type
162295fce210SBarry Smith . leafdata - values to reduce
162395fce210SBarry Smith - op       - reduction operation
162495fce210SBarry Smith 
16254165533cSJose E. Roman   Output Parameter:
162695fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
162795fce210SBarry Smith 
162895fce210SBarry Smith   Level: intermediate
162995fce210SBarry Smith 
163020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
163195fce210SBarry Smith @*/
1632d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1633d71ae5a4SJacob Faibussowitsch {
163495fce210SBarry Smith   PetscFunctionBegin;
163595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16369566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1637dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
16389566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
16393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
164095fce210SBarry Smith }
164195fce210SBarry Smith 
164295fce210SBarry Smith /*@C
1643cab54364SBarry Smith   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1644cab54364SBarry Smith   to be completed with `PetscSFFetchAndOpEnd()`
1645a1729e3fSJunchao Zhang 
1646a1729e3fSJunchao Zhang   Collective
1647a1729e3fSJunchao Zhang 
16484165533cSJose E. Roman   Input Parameters:
1649a1729e3fSJunchao Zhang + sf       - star forest
1650a1729e3fSJunchao Zhang . unit     - data type
1651a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1652a1729e3fSJunchao Zhang - op       - operation to use for reduction
1653a1729e3fSJunchao Zhang 
16544165533cSJose E. Roman   Output Parameters:
1655a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1656a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1657a1729e3fSJunchao Zhang 
1658a1729e3fSJunchao Zhang   Level: advanced
1659a1729e3fSJunchao Zhang 
1660a1729e3fSJunchao Zhang   Note:
1661a1729e3fSJunchao Zhang   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1662a1729e3fSJunchao Zhang   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1663a1729e3fSJunchao Zhang   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1664a1729e3fSJunchao Zhang   integers.
1665a1729e3fSJunchao Zhang 
1666cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1667a1729e3fSJunchao Zhang @*/
1668d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1669d71ae5a4SJacob Faibussowitsch {
1670eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1671a1729e3fSJunchao Zhang 
1672a1729e3fSJunchao Zhang   PetscFunctionBegin;
1673a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16749566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16759566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16769566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
16779566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
16789566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
167908401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1680dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
16819566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1683a1729e3fSJunchao Zhang }
1684a1729e3fSJunchao Zhang 
1685a1729e3fSJunchao Zhang /*@C
1686cab54364SBarry Smith   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1687cab54364SBarry Smith   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1688d3b3e55cSJunchao Zhang 
1689d3b3e55cSJunchao Zhang   Collective
1690d3b3e55cSJunchao Zhang 
1691d3b3e55cSJunchao Zhang   Input Parameters:
1692d3b3e55cSJunchao Zhang + sf              - star forest
1693d3b3e55cSJunchao Zhang . unit            - data type
1694d3b3e55cSJunchao Zhang . rootmtype       - memory type of rootdata
1695d3b3e55cSJunchao Zhang . leafmtype       - memory type of leafdata
1696d3b3e55cSJunchao Zhang . leafdata        - leaf values to use in reduction
1697d3b3e55cSJunchao Zhang . leafupdatemtype - memory type of leafupdate
1698d3b3e55cSJunchao Zhang - op              - operation to use for reduction
1699d3b3e55cSJunchao Zhang 
1700d3b3e55cSJunchao Zhang   Output Parameters:
1701d3b3e55cSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1702d3b3e55cSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1703d3b3e55cSJunchao Zhang 
1704d3b3e55cSJunchao Zhang   Level: advanced
1705d3b3e55cSJunchao Zhang 
1706cab54364SBarry Smith   Note:
1707cab54364SBarry Smith   See `PetscSFFetchAndOpBegin()` for more details.
1708d3b3e55cSJunchao Zhang 
170920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1710d3b3e55cSJunchao Zhang @*/
1711d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1712d71ae5a4SJacob Faibussowitsch {
1713d3b3e55cSJunchao Zhang   PetscFunctionBegin;
1714d3b3e55cSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17159566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
17169566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
171708401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1718dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17199566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1721d3b3e55cSJunchao Zhang }
1722d3b3e55cSJunchao Zhang 
1723d3b3e55cSJunchao Zhang /*@C
172420662ed9SBarry Smith   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
172520662ed9SBarry Smith   to fetch values from roots and update atomically by applying operation using my leaf value
1726a1729e3fSJunchao Zhang 
1727a1729e3fSJunchao Zhang   Collective
1728a1729e3fSJunchao Zhang 
17294165533cSJose E. Roman   Input Parameters:
1730a1729e3fSJunchao Zhang + sf       - star forest
1731a1729e3fSJunchao Zhang . unit     - data type
1732a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1733a1729e3fSJunchao Zhang - op       - operation to use for reduction
1734a1729e3fSJunchao Zhang 
17354165533cSJose E. Roman   Output Parameters:
1736a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1737a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1738a1729e3fSJunchao Zhang 
1739a1729e3fSJunchao Zhang   Level: advanced
1740a1729e3fSJunchao Zhang 
174120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1742a1729e3fSJunchao Zhang @*/
1743d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1744d71ae5a4SJacob Faibussowitsch {
1745a1729e3fSJunchao Zhang   PetscFunctionBegin;
1746a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17479566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1748dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
17499566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
17503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1751a1729e3fSJunchao Zhang }
1752a1729e3fSJunchao Zhang 
1753a1729e3fSJunchao Zhang /*@C
1754cab54364SBarry Smith   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
175595fce210SBarry Smith 
175695fce210SBarry Smith   Collective
175795fce210SBarry Smith 
17584165533cSJose E. Roman   Input Parameter:
175995fce210SBarry Smith . sf - star forest
176095fce210SBarry Smith 
17614165533cSJose E. Roman   Output Parameter:
176295fce210SBarry Smith . degree - degree of each root vertex
176395fce210SBarry Smith 
176495fce210SBarry Smith   Level: advanced
176595fce210SBarry Smith 
1766cab54364SBarry Smith   Note:
176720662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1768ffe67aa5SVáclav Hapla 
1769cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
177095fce210SBarry Smith @*/
1771d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1772d71ae5a4SJacob Faibussowitsch {
177395fce210SBarry Smith   PetscFunctionBegin;
177495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
177595fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
17764f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
1777803bd9e8SMatthew G. Knepley   if (!sf->degreeknown) {
17785b0d146aSStefano Zampini     PetscInt i, nroots = sf->nroots, maxlocal;
177928b400f6SJacob Faibussowitsch     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
17805b0d146aSStefano Zampini     maxlocal = sf->maxleaf - sf->minleaf + 1;
17819566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nroots, &sf->degree));
17829566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
178329046d53SLisandro Dalcin     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
17849837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
17859566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
178695fce210SBarry Smith   }
178795fce210SBarry Smith   *degree = NULL;
17883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
178995fce210SBarry Smith }
179095fce210SBarry Smith 
179195fce210SBarry Smith /*@C
1792cab54364SBarry Smith   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
179395fce210SBarry Smith 
179495fce210SBarry Smith   Collective
179595fce210SBarry Smith 
17964165533cSJose E. Roman   Input Parameter:
179795fce210SBarry Smith . sf - star forest
179895fce210SBarry Smith 
17994165533cSJose E. Roman   Output Parameter:
180095fce210SBarry Smith . degree - degree of each root vertex
180195fce210SBarry Smith 
180295fce210SBarry Smith   Level: developer
180395fce210SBarry Smith 
1804cab54364SBarry Smith   Note:
180520662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1806ffe67aa5SVáclav Hapla 
1807cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
180895fce210SBarry Smith @*/
1809d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1810d71ae5a4SJacob Faibussowitsch {
181195fce210SBarry Smith   PetscFunctionBegin;
181295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
181395fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
18144f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
181595fce210SBarry Smith   if (!sf->degreeknown) {
181628b400f6SJacob Faibussowitsch     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
18179566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
18189566063dSJacob Faibussowitsch     PetscCall(PetscFree(sf->degreetmp));
181995fce210SBarry Smith     sf->degreeknown = PETSC_TRUE;
182095fce210SBarry Smith   }
182195fce210SBarry Smith   *degree = sf->degree;
18223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
182395fce210SBarry Smith }
182495fce210SBarry Smith 
1825673100f5SVaclav Hapla /*@C
182620662ed9SBarry Smith   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
182766dfcd1aSVaclav Hapla   Each multi-root is assigned index of the corresponding original root.
1828673100f5SVaclav Hapla 
1829673100f5SVaclav Hapla   Collective
1830673100f5SVaclav Hapla 
18314165533cSJose E. Roman   Input Parameters:
1832673100f5SVaclav Hapla + sf     - star forest
1833cab54364SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1834673100f5SVaclav Hapla 
18354165533cSJose E. Roman   Output Parameters:
183620662ed9SBarry Smith + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
183720662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1838673100f5SVaclav Hapla 
1839673100f5SVaclav Hapla   Level: developer
1840673100f5SVaclav Hapla 
1841cab54364SBarry Smith   Note:
184220662ed9SBarry Smith   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1843ffe67aa5SVáclav Hapla 
1844cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1845673100f5SVaclav Hapla @*/
1846d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1847d71ae5a4SJacob Faibussowitsch {
1848673100f5SVaclav Hapla   PetscSF  msf;
1849673100f5SVaclav Hapla   PetscInt i, j, k, nroots, nmroots;
1850673100f5SVaclav Hapla 
1851673100f5SVaclav Hapla   PetscFunctionBegin;
1852673100f5SVaclav Hapla   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18539566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
18544f572ea9SToby Isaac   if (nroots) PetscAssertPointer(degree, 2);
18554f572ea9SToby Isaac   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
18564f572ea9SToby Isaac   PetscAssertPointer(multiRootsOrigNumbering, 4);
18579566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &msf));
18589566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
18599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1860673100f5SVaclav Hapla   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1861673100f5SVaclav Hapla     if (!degree[i]) continue;
1862ad540459SPierre Jolivet     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1863673100f5SVaclav Hapla   }
186408401ef6SPierre Jolivet   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
186566dfcd1aSVaclav Hapla   if (nMultiRoots) *nMultiRoots = nmroots;
18663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1867673100f5SVaclav Hapla }
1868673100f5SVaclav Hapla 
186995fce210SBarry Smith /*@C
1870cab54364SBarry Smith   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
187195fce210SBarry Smith 
187295fce210SBarry Smith   Collective
187395fce210SBarry Smith 
18744165533cSJose E. Roman   Input Parameters:
187595fce210SBarry Smith + sf       - star forest
187695fce210SBarry Smith . unit     - data type
187795fce210SBarry Smith - leafdata - leaf data to gather to roots
187895fce210SBarry Smith 
18794165533cSJose E. Roman   Output Parameter:
188095fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
188195fce210SBarry Smith 
188295fce210SBarry Smith   Level: intermediate
188395fce210SBarry Smith 
1884cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
188595fce210SBarry Smith @*/
1886d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1887d71ae5a4SJacob Faibussowitsch {
1888a5526d50SJunchao Zhang   PetscSF multi = NULL;
188995fce210SBarry Smith 
189095fce210SBarry Smith   PetscFunctionBegin;
189195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18929566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
18939566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
18949566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
18953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
189695fce210SBarry Smith }
189795fce210SBarry Smith 
189895fce210SBarry Smith /*@C
1899cab54364SBarry Smith   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
190095fce210SBarry Smith 
190195fce210SBarry Smith   Collective
190295fce210SBarry Smith 
19034165533cSJose E. Roman   Input Parameters:
190495fce210SBarry Smith + sf       - star forest
190595fce210SBarry Smith . unit     - data type
190695fce210SBarry Smith - leafdata - leaf data to gather to roots
190795fce210SBarry Smith 
19084165533cSJose E. Roman   Output Parameter:
190995fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
191095fce210SBarry Smith 
191195fce210SBarry Smith   Level: intermediate
191295fce210SBarry Smith 
1913cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
191495fce210SBarry Smith @*/
1915d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1916d71ae5a4SJacob Faibussowitsch {
1917a5526d50SJunchao Zhang   PetscSF multi = NULL;
191895fce210SBarry Smith 
191995fce210SBarry Smith   PetscFunctionBegin;
192095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19219566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19229566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
192495fce210SBarry Smith }
192595fce210SBarry Smith 
192695fce210SBarry Smith /*@C
1927cab54364SBarry Smith   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
192895fce210SBarry Smith 
192995fce210SBarry Smith   Collective
193095fce210SBarry Smith 
19314165533cSJose E. Roman   Input Parameters:
193295fce210SBarry Smith + sf            - star forest
193395fce210SBarry Smith . unit          - data type
193495fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
193595fce210SBarry Smith 
19364165533cSJose E. Roman   Output Parameter:
193795fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
193895fce210SBarry Smith 
193995fce210SBarry Smith   Level: intermediate
194095fce210SBarry Smith 
194120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
194295fce210SBarry Smith @*/
1943d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1944d71ae5a4SJacob Faibussowitsch {
1945a5526d50SJunchao Zhang   PetscSF multi = NULL;
194695fce210SBarry Smith 
194795fce210SBarry Smith   PetscFunctionBegin;
194895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19499566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19509566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19519566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
195395fce210SBarry Smith }
195495fce210SBarry Smith 
195595fce210SBarry Smith /*@C
1956cab54364SBarry Smith   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
195795fce210SBarry Smith 
195895fce210SBarry Smith   Collective
195995fce210SBarry Smith 
19604165533cSJose E. Roman   Input Parameters:
196195fce210SBarry Smith + sf            - star forest
196295fce210SBarry Smith . unit          - data type
196395fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
196495fce210SBarry Smith 
19654165533cSJose E. Roman   Output Parameter:
196695fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
196795fce210SBarry Smith 
196895fce210SBarry Smith   Level: intermediate
196995fce210SBarry Smith 
197020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
197195fce210SBarry Smith @*/
1972d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1973d71ae5a4SJacob Faibussowitsch {
1974a5526d50SJunchao Zhang   PetscSF multi = NULL;
197595fce210SBarry Smith 
197695fce210SBarry Smith   PetscFunctionBegin;
197795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19789566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19799566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
198195fce210SBarry Smith }
1982a7b3aa13SAta Mesgarnejad 
1983d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1984d71ae5a4SJacob Faibussowitsch {
1985a072220fSLawrence Mitchell   PetscInt        i, n, nleaves;
1986a072220fSLawrence Mitchell   const PetscInt *ilocal = NULL;
1987a072220fSLawrence Mitchell   PetscHSetI      seen;
1988a072220fSLawrence Mitchell 
1989a072220fSLawrence Mitchell   PetscFunctionBegin;
1990b458e8f1SJose E. Roman   if (PetscDefined(USE_DEBUG)) {
19919566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
19929566063dSJacob Faibussowitsch     PetscCall(PetscHSetICreate(&seen));
1993a072220fSLawrence Mitchell     for (i = 0; i < nleaves; i++) {
1994a072220fSLawrence Mitchell       const PetscInt leaf = ilocal ? ilocal[i] : i;
19959566063dSJacob Faibussowitsch       PetscCall(PetscHSetIAdd(seen, leaf));
1996a072220fSLawrence Mitchell     }
19979566063dSJacob Faibussowitsch     PetscCall(PetscHSetIGetSize(seen, &n));
199808401ef6SPierre Jolivet     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
19999566063dSJacob Faibussowitsch     PetscCall(PetscHSetIDestroy(&seen));
2000b458e8f1SJose E. Roman   }
20013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2002a072220fSLawrence Mitchell }
200354729392SStefano Zampini 
2004a7b3aa13SAta Mesgarnejad /*@
2005cab54364SBarry Smith   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2006a7b3aa13SAta Mesgarnejad 
2007a7b3aa13SAta Mesgarnejad   Input Parameters:
2008cab54364SBarry Smith + sfA - The first `PetscSF`
2009cab54364SBarry Smith - sfB - The second `PetscSF`
2010a7b3aa13SAta Mesgarnejad 
20112fe279fdSBarry Smith   Output Parameter:
2012cab54364SBarry Smith . sfBA - The composite `PetscSF`
2013a7b3aa13SAta Mesgarnejad 
2014a7b3aa13SAta Mesgarnejad   Level: developer
2015a7b3aa13SAta Mesgarnejad 
2016a072220fSLawrence Mitchell   Notes:
2017cab54364SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
201854729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots.
201954729392SStefano Zampini 
202020662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
202120662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
202220662ed9SBarry Smith   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
202320662ed9SBarry Smith   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2024a072220fSLawrence Mitchell 
2025db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2026a7b3aa13SAta Mesgarnejad @*/
2027d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2028d71ae5a4SJacob Faibussowitsch {
2029a7b3aa13SAta Mesgarnejad   const PetscSFNode *remotePointsA, *remotePointsB;
2030d41018fbSJunchao Zhang   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
203154729392SStefano Zampini   const PetscInt    *localPointsA, *localPointsB;
203254729392SStefano Zampini   PetscInt          *localPointsBA;
203354729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
203454729392SStefano Zampini   PetscBool          denseB;
2035a7b3aa13SAta Mesgarnejad 
2036a7b3aa13SAta Mesgarnejad   PetscFunctionBegin;
2037a7b3aa13SAta Mesgarnejad   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
203829046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfA, 1);
203929046d53SLisandro Dalcin   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
204029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfB, 2);
204154729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
20424f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
20439566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
20449566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
204554729392SStefano Zampini 
20469566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
20479566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
204820662ed9SBarry Smith   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
204920662ed9SBarry Smith      numRootsB; otherwise, garbage will be broadcasted.
205020662ed9SBarry Smith      Example (comm size = 1):
205120662ed9SBarry Smith      sfA: 0 <- (0, 0)
205220662ed9SBarry Smith      sfB: 100 <- (0, 0)
205320662ed9SBarry Smith           101 <- (0, 1)
205420662ed9SBarry Smith      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
205520662ed9SBarry Smith      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
205620662ed9SBarry Smith      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
205720662ed9SBarry Smith      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
20589566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
205954729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
206054729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
206154729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
206254729392SStefano Zampini   }
206354729392SStefano Zampini   for (i = 0; i < numLeavesA; i++) {
20640ea77edaSksagiyam     PetscInt localp = localPointsA ? localPointsA[i] : i;
20650ea77edaSksagiyam 
20660ea77edaSksagiyam     if (localp >= numRootsB) continue;
20670ea77edaSksagiyam     reorderedRemotePointsA[localp] = remotePointsA[i];
206854729392SStefano Zampini   }
2069d41018fbSJunchao Zhang   remotePointsA = reorderedRemotePointsA;
20709566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
20719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
20720ea77edaSksagiyam   for (i = 0; i < maxleaf - minleaf + 1; i++) {
20730ea77edaSksagiyam     leafdataB[i].rank  = -1;
20740ea77edaSksagiyam     leafdataB[i].index = -1;
20750ea77edaSksagiyam   }
20768e3a54c0SPierre Jolivet   PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20778e3a54c0SPierre Jolivet   PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20789566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
2079d41018fbSJunchao Zhang 
208054729392SStefano Zampini   denseB = (PetscBool)!localPointsB;
208154729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
208254729392SStefano Zampini     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
208354729392SStefano Zampini     else numLeavesBA++;
208454729392SStefano Zampini   }
208554729392SStefano Zampini   if (denseB) {
2086d41018fbSJunchao Zhang     localPointsBA  = NULL;
2087d41018fbSJunchao Zhang     remotePointsBA = leafdataB;
2088d41018fbSJunchao Zhang   } else {
20899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
20909566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
209154729392SStefano Zampini     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
209254729392SStefano Zampini       const PetscInt l = localPointsB ? localPointsB[i] : i;
209354729392SStefano Zampini 
209454729392SStefano Zampini       if (leafdataB[l - minleaf].rank == -1) continue;
209554729392SStefano Zampini       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
209654729392SStefano Zampini       localPointsBA[numLeavesBA]  = l;
209754729392SStefano Zampini       numLeavesBA++;
209854729392SStefano Zampini     }
20999566063dSJacob Faibussowitsch     PetscCall(PetscFree(leafdataB));
2100d41018fbSJunchao Zhang   }
21019566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
21029566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
21039566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
21043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2105a7b3aa13SAta Mesgarnejad }
21061c6ba672SJunchao Zhang 
210704c0ada0SJunchao Zhang /*@
2108cab54364SBarry Smith   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
210904c0ada0SJunchao Zhang 
211004c0ada0SJunchao Zhang   Input Parameters:
2111cab54364SBarry Smith + sfA - The first `PetscSF`
2112cab54364SBarry Smith - sfB - The second `PetscSF`
211304c0ada0SJunchao Zhang 
21142fe279fdSBarry Smith   Output Parameter:
2115cab54364SBarry Smith . sfBA - The composite `PetscSF`.
211604c0ada0SJunchao Zhang 
211704c0ada0SJunchao Zhang   Level: developer
211804c0ada0SJunchao Zhang 
211954729392SStefano Zampini   Notes:
212020662ed9SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
212154729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
212220662ed9SBarry Smith   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
212354729392SStefano Zampini 
212420662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
212520662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
212620662ed9SBarry Smith   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
212720662ed9SBarry Smith   on `sfA`, then
212820662ed9SBarry Smith   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
212954729392SStefano Zampini 
2130db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
213104c0ada0SJunchao Zhang @*/
2132d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2133d71ae5a4SJacob Faibussowitsch {
213404c0ada0SJunchao Zhang   const PetscSFNode *remotePointsA, *remotePointsB;
213504c0ada0SJunchao Zhang   PetscSFNode       *remotePointsBA;
213604c0ada0SJunchao Zhang   const PetscInt    *localPointsA, *localPointsB;
213754729392SStefano Zampini   PetscSFNode       *reorderedRemotePointsA = NULL;
213854729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
21395b0d146aSStefano Zampini   MPI_Op             op;
21405b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21415b0d146aSStefano Zampini   PetscBool iswin;
21425b0d146aSStefano Zampini #endif
214304c0ada0SJunchao Zhang 
214404c0ada0SJunchao Zhang   PetscFunctionBegin;
214504c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
214604c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfA, 1);
214704c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
214804c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfB, 2);
214954729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
21504f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
21519566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
21529566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
215354729392SStefano Zampini 
21549566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
21559566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
21565b0d146aSStefano Zampini 
21575b0d146aSStefano Zampini   /* TODO: Check roots of sfB have degree of 1 */
21585b0d146aSStefano Zampini   /* Once we implement it, we can replace the MPI_MAXLOC
215983df288dSJunchao Zhang      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
21605b0d146aSStefano Zampini      We use MPI_MAXLOC only to have a deterministic output from this routine if
21615b0d146aSStefano Zampini      the root condition is not meet.
21625b0d146aSStefano Zampini    */
21635b0d146aSStefano Zampini   op = MPI_MAXLOC;
21645b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21655b0d146aSStefano Zampini   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
21669566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
216783df288dSJunchao Zhang   if (iswin) op = MPI_REPLACE;
21685b0d146aSStefano Zampini #endif
21695b0d146aSStefano Zampini 
21709566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
21719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
217254729392SStefano Zampini   for (i = 0; i < maxleaf - minleaf + 1; i++) {
217354729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
217454729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
217554729392SStefano Zampini   }
217654729392SStefano Zampini   if (localPointsA) {
217754729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
217854729392SStefano Zampini       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
217954729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
218054729392SStefano Zampini     }
218154729392SStefano Zampini   } else {
218254729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
218354729392SStefano Zampini       if (i > maxleaf || i < minleaf) continue;
218454729392SStefano Zampini       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
218554729392SStefano Zampini     }
218654729392SStefano Zampini   }
218754729392SStefano Zampini 
21889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
21899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
219054729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
219154729392SStefano Zampini     remotePointsBA[i].rank  = -1;
219254729392SStefano Zampini     remotePointsBA[i].index = -1;
219354729392SStefano Zampini   }
219454729392SStefano Zampini 
21958e3a54c0SPierre Jolivet   PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
21968e3a54c0SPierre Jolivet   PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
21979566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
219854729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
219954729392SStefano Zampini     if (remotePointsBA[i].rank == -1) continue;
220054729392SStefano Zampini     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
220154729392SStefano Zampini     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
220254729392SStefano Zampini     localPointsBA[numLeavesBA]        = i;
220354729392SStefano Zampini     numLeavesBA++;
220454729392SStefano Zampini   }
22059566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
22069566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
22079566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
22083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
220904c0ada0SJunchao Zhang }
221004c0ada0SJunchao Zhang 
22111c6ba672SJunchao Zhang /*
2212cab54364SBarry Smith   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
22131c6ba672SJunchao Zhang 
22142fe279fdSBarry Smith   Input Parameter:
2215cab54364SBarry Smith . sf - The global `PetscSF`
22161c6ba672SJunchao Zhang 
22172fe279fdSBarry Smith   Output Parameter:
2218cab54364SBarry Smith . out - The local `PetscSF`
2219cab54364SBarry Smith 
2220cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`
22211c6ba672SJunchao Zhang  */
2222d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2223d71ae5a4SJacob Faibussowitsch {
22241c6ba672SJunchao Zhang   MPI_Comm           comm;
22251c6ba672SJunchao Zhang   PetscMPIInt        myrank;
22261c6ba672SJunchao Zhang   const PetscInt    *ilocal;
22271c6ba672SJunchao Zhang   const PetscSFNode *iremote;
22281c6ba672SJunchao Zhang   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
22291c6ba672SJunchao Zhang   PetscSFNode       *liremote;
22301c6ba672SJunchao Zhang   PetscSF            lsf;
22311c6ba672SJunchao Zhang 
22321c6ba672SJunchao Zhang   PetscFunctionBegin;
22331c6ba672SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2234dbbe0bcdSBarry Smith   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2235dbbe0bcdSBarry Smith   else {
22361c6ba672SJunchao Zhang     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
22379566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22389566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
22391c6ba672SJunchao Zhang 
22401c6ba672SJunchao Zhang     /* Find out local edges and build a local SF */
22419566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
22429371c9d4SSatish Balay     for (i = lnleaves = 0; i < nleaves; i++) {
22439371c9d4SSatish Balay       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
22449371c9d4SSatish Balay     }
22459566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &lilocal));
22469566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &liremote));
22471c6ba672SJunchao Zhang 
22481c6ba672SJunchao Zhang     for (i = j = 0; i < nleaves; i++) {
22491c6ba672SJunchao Zhang       if (iremote[i].rank == (PetscInt)myrank) {
22501c6ba672SJunchao Zhang         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
22511c6ba672SJunchao Zhang         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
22521c6ba672SJunchao Zhang         liremote[j].index = iremote[i].index;
22531c6ba672SJunchao Zhang         j++;
22541c6ba672SJunchao Zhang       }
22551c6ba672SJunchao Zhang     }
22569566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
22579566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(lsf));
22589566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
22599566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(lsf));
22601c6ba672SJunchao Zhang     *out = lsf;
22611c6ba672SJunchao Zhang   }
22623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22631c6ba672SJunchao Zhang }
2264dd5b3ca6SJunchao Zhang 
2265dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2266d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2267d71ae5a4SJacob Faibussowitsch {
2268eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
2269dd5b3ca6SJunchao Zhang 
2270dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
2271dd5b3ca6SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
22729566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
22739566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
22749566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
22759566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2276dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
22779566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
22783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2279dd5b3ca6SJunchao Zhang }
2280dd5b3ca6SJunchao Zhang 
2281157edd7aSVaclav Hapla /*@
2282cab54364SBarry Smith   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2283157edd7aSVaclav Hapla 
2284157edd7aSVaclav Hapla   Input Parameters:
2285157edd7aSVaclav Hapla + comm        - the communicator
2286cab54364SBarry Smith . nsfs        - the number of input `PetscSF`
2287cab54364SBarry Smith . sfs         - the array of input `PetscSF`
22881f40158dSVaclav Hapla . rootMode    - the root mode specifying how roots are handled
228920662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2290157edd7aSVaclav Hapla 
22912fe279fdSBarry Smith   Output Parameter:
2292cab54364SBarry Smith . newsf - The resulting `PetscSF`
2293157edd7aSVaclav Hapla 
22941f40158dSVaclav Hapla   Level: advanced
2295157edd7aSVaclav Hapla 
2296157edd7aSVaclav Hapla   Notes:
229720662ed9SBarry Smith   The communicator of all `PetscSF`s in `sfs` must be comm.
2298157edd7aSVaclav Hapla 
229920662ed9SBarry Smith   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
230020662ed9SBarry Smith 
230120662ed9SBarry Smith   The offsets in `leafOffsets` are added to the original leaf indices.
230220662ed9SBarry Smith 
230320662ed9SBarry Smith   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
230420662ed9SBarry Smith   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
230520662ed9SBarry Smith 
230620662ed9SBarry Smith   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2307157edd7aSVaclav Hapla   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2308157edd7aSVaclav Hapla 
230920662ed9SBarry Smith   All root modes retain the essential connectivity condition.
231020662ed9SBarry Smith   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
231120662ed9SBarry Smith   Parameter `rootMode` controls how the input root spaces are combined.
231220662ed9SBarry Smith   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
231320662ed9SBarry Smith   and is also the same in the output `PetscSF`.
23141f40158dSVaclav Hapla   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
23151f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
231620662ed9SBarry Smith   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
23171f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
23181593df67SStefano Zampini   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
23191f40158dSVaclav Hapla   the original root ranks are ignored.
23201f40158dSVaclav Hapla   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
232120662ed9SBarry Smith   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
232220662ed9SBarry Smith   to keep the load balancing.
232320662ed9SBarry Smith   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
23241f40158dSVaclav Hapla 
23251f40158dSVaclav Hapla   Example:
23261f40158dSVaclav Hapla   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
232720662ed9SBarry Smith .vb
232820662ed9SBarry Smith   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
232920662ed9SBarry Smith   for m in {local,global,shared}; do
233020662ed9SBarry Smith     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
233120662ed9SBarry Smith   done
233220662ed9SBarry Smith .ve
233320662ed9SBarry Smith   we generate two identical `PetscSF`s sf_0 and sf_1,
233420662ed9SBarry Smith .vb
233520662ed9SBarry Smith   PetscSF Object: sf_0 2 MPI processes
233620662ed9SBarry Smith     type: basic
233720662ed9SBarry Smith     rank #leaves #roots
233820662ed9SBarry Smith     [ 0]       4      2
233920662ed9SBarry Smith     [ 1]       4      2
234020662ed9SBarry Smith     leaves      roots       roots in global numbering
234120662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
234220662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
234320662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
234420662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
234520662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
234620662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
234720662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
234820662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
234920662ed9SBarry Smith .ve
2350e33f79d8SJacob Faibussowitsch   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
235120662ed9SBarry Smith .vb
235220662ed9SBarry Smith   rootMode = local:
235320662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
235420662ed9SBarry Smith     type: basic
235520662ed9SBarry Smith     rank #leaves #roots
235620662ed9SBarry Smith     [ 0]       8      4
235720662ed9SBarry Smith     [ 1]       8      4
235820662ed9SBarry Smith     leaves      roots       roots in global numbering
235920662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
236020662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
236120662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   4
236220662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   5
236320662ed9SBarry Smith     ( 0,  4) <- ( 0,  2)  =   2
236420662ed9SBarry Smith     ( 0,  5) <- ( 0,  3)  =   3
236520662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
236620662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
236720662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
236820662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
236920662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   4
237020662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   5
237120662ed9SBarry Smith     ( 1,  4) <- ( 0,  2)  =   2
237220662ed9SBarry Smith     ( 1,  5) <- ( 0,  3)  =   3
237320662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
237420662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
237520662ed9SBarry Smith 
237620662ed9SBarry Smith   rootMode = global:
237720662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
237820662ed9SBarry Smith     type: basic
237920662ed9SBarry Smith     rank #leaves #roots
238020662ed9SBarry Smith     [ 0]       8      4
238120662ed9SBarry Smith     [ 1]       8      4
238220662ed9SBarry Smith     leaves      roots       roots in global numbering
238320662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
238420662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
238520662ed9SBarry Smith     ( 0,  2) <- ( 0,  2)  =   2
238620662ed9SBarry Smith     ( 0,  3) <- ( 0,  3)  =   3
238720662ed9SBarry Smith     ( 0,  4) <- ( 1,  0)  =   4
238820662ed9SBarry Smith     ( 0,  5) <- ( 1,  1)  =   5
238920662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
239020662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
239120662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
239220662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
239320662ed9SBarry Smith     ( 1,  2) <- ( 0,  2)  =   2
239420662ed9SBarry Smith     ( 1,  3) <- ( 0,  3)  =   3
239520662ed9SBarry Smith     ( 1,  4) <- ( 1,  0)  =   4
239620662ed9SBarry Smith     ( 1,  5) <- ( 1,  1)  =   5
239720662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
239820662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
239920662ed9SBarry Smith 
240020662ed9SBarry Smith   rootMode = shared:
240120662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
240220662ed9SBarry Smith     type: basic
240320662ed9SBarry Smith     rank #leaves #roots
240420662ed9SBarry Smith     [ 0]       8      2
240520662ed9SBarry Smith     [ 1]       8      2
240620662ed9SBarry Smith     leaves      roots       roots in global numbering
240720662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
240820662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
240920662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
241020662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
241120662ed9SBarry Smith     ( 0,  4) <- ( 0,  0)  =   0
241220662ed9SBarry Smith     ( 0,  5) <- ( 0,  1)  =   1
241320662ed9SBarry Smith     ( 0,  6) <- ( 1,  0)  =   2
241420662ed9SBarry Smith     ( 0,  7) <- ( 1,  1)  =   3
241520662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
241620662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
241720662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
241820662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
241920662ed9SBarry Smith     ( 1,  4) <- ( 0,  0)  =   0
242020662ed9SBarry Smith     ( 1,  5) <- ( 0,  1)  =   1
242120662ed9SBarry Smith     ( 1,  6) <- ( 1,  0)  =   2
242220662ed9SBarry Smith     ( 1,  7) <- ( 1,  1)  =   3
242320662ed9SBarry Smith .ve
24241f40158dSVaclav Hapla 
24251f40158dSVaclav Hapla .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2426157edd7aSVaclav Hapla @*/
24271f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2428d71ae5a4SJacob Faibussowitsch {
2429157edd7aSVaclav Hapla   PetscInt     i, s, nLeaves, nRoots;
2430157edd7aSVaclav Hapla   PetscInt    *leafArrayOffsets;
2431157edd7aSVaclav Hapla   PetscInt    *ilocal_new;
2432157edd7aSVaclav Hapla   PetscSFNode *iremote_new;
2433157edd7aSVaclav Hapla   PetscBool    all_ilocal_null = PETSC_FALSE;
24341f40158dSVaclav Hapla   PetscLayout  glayout         = NULL;
24351f40158dSVaclav Hapla   PetscInt    *gremote         = NULL;
24361f40158dSVaclav Hapla   PetscMPIInt  rank, size;
2437157edd7aSVaclav Hapla 
2438157edd7aSVaclav Hapla   PetscFunctionBegin;
243912f479c1SVaclav Hapla   if (PetscDefined(USE_DEBUG)) {
2440157edd7aSVaclav Hapla     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2441157edd7aSVaclav Hapla 
24429566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, &dummy));
2443157edd7aSVaclav Hapla     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
24444f572ea9SToby Isaac     PetscAssertPointer(sfs, 3);
2445157edd7aSVaclav Hapla     for (i = 0; i < nsfs; i++) {
2446157edd7aSVaclav Hapla       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2447157edd7aSVaclav Hapla       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2448157edd7aSVaclav Hapla     }
24491f40158dSVaclav Hapla     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
24504f572ea9SToby Isaac     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
24514f572ea9SToby Isaac     PetscAssertPointer(newsf, 6);
24529566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&dummy));
2453157edd7aSVaclav Hapla   }
2454157edd7aSVaclav Hapla   if (!nsfs) {
24559566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
24569566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
24573ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2458157edd7aSVaclav Hapla   }
24599566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
24601f40158dSVaclav Hapla   PetscCallMPI(MPI_Comm_size(comm, &size));
2461157edd7aSVaclav Hapla 
24621f40158dSVaclav Hapla   /* Calculate leaf array offsets */
24639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2464157edd7aSVaclav Hapla   leafArrayOffsets[0] = 0;
2465157edd7aSVaclav Hapla   for (s = 0; s < nsfs; s++) {
2466157edd7aSVaclav Hapla     PetscInt nl;
2467157edd7aSVaclav Hapla 
24689566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2469157edd7aSVaclav Hapla     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2470157edd7aSVaclav Hapla   }
2471157edd7aSVaclav Hapla   nLeaves = leafArrayOffsets[nsfs];
2472157edd7aSVaclav Hapla 
24731f40158dSVaclav Hapla   /* Calculate number of roots */
24741f40158dSVaclav Hapla   switch (rootMode) {
24751f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
24761f40158dSVaclav Hapla     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
24771f40158dSVaclav Hapla     if (PetscDefined(USE_DEBUG)) {
24781f40158dSVaclav Hapla       for (s = 1; s < nsfs; s++) {
24791f40158dSVaclav Hapla         PetscInt nr;
24801f40158dSVaclav Hapla 
24811f40158dSVaclav Hapla         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
24821f40158dSVaclav Hapla         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
24831f40158dSVaclav Hapla       }
24841f40158dSVaclav Hapla     }
24851f40158dSVaclav Hapla   } break;
24861f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
24871f40158dSVaclav Hapla     /* Calculate also global layout in this case */
24881f40158dSVaclav Hapla     PetscInt    *nls;
24891f40158dSVaclav Hapla     PetscLayout *lts;
24901f40158dSVaclav Hapla     PetscInt   **inds;
24911f40158dSVaclav Hapla     PetscInt     j;
24921f40158dSVaclav Hapla     PetscInt     rootOffset = 0;
24931f40158dSVaclav Hapla 
24941f40158dSVaclav Hapla     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
24951f40158dSVaclav Hapla     PetscCall(PetscLayoutCreate(comm, &glayout));
24961f40158dSVaclav Hapla     glayout->bs = 1;
24971f40158dSVaclav Hapla     glayout->n  = 0;
24981f40158dSVaclav Hapla     glayout->N  = 0;
24991f40158dSVaclav Hapla     for (s = 0; s < nsfs; s++) {
25001f40158dSVaclav Hapla       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
25011f40158dSVaclav Hapla       glayout->n += lts[s]->n;
25021f40158dSVaclav Hapla       glayout->N += lts[s]->N;
25031f40158dSVaclav Hapla     }
25041f40158dSVaclav Hapla     PetscCall(PetscLayoutSetUp(glayout));
25051f40158dSVaclav Hapla     PetscCall(PetscMalloc1(nLeaves, &gremote));
25061f40158dSVaclav Hapla     for (s = 0, j = 0; s < nsfs; s++) {
25071f40158dSVaclav Hapla       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
25081f40158dSVaclav Hapla       rootOffset += lts[s]->N;
25091f40158dSVaclav Hapla       PetscCall(PetscLayoutDestroy(&lts[s]));
25101f40158dSVaclav Hapla       PetscCall(PetscFree(inds[s]));
25111f40158dSVaclav Hapla     }
25121f40158dSVaclav Hapla     PetscCall(PetscFree3(lts, nls, inds));
25131f40158dSVaclav Hapla     nRoots = glayout->N;
25141f40158dSVaclav Hapla   } break;
25151f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
25161f40158dSVaclav Hapla     /* nRoots calculated later in this case */
25171f40158dSVaclav Hapla     break;
25181f40158dSVaclav Hapla   default:
25191f40158dSVaclav Hapla     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
25201f40158dSVaclav Hapla   }
25211f40158dSVaclav Hapla 
2522157edd7aSVaclav Hapla   if (!leafOffsets) {
2523157edd7aSVaclav Hapla     all_ilocal_null = PETSC_TRUE;
2524157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2525157edd7aSVaclav Hapla       const PetscInt *ilocal;
2526157edd7aSVaclav Hapla 
25279566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2528157edd7aSVaclav Hapla       if (ilocal) {
2529157edd7aSVaclav Hapla         all_ilocal_null = PETSC_FALSE;
2530157edd7aSVaclav Hapla         break;
2531157edd7aSVaclav Hapla       }
2532157edd7aSVaclav Hapla     }
2533157edd7aSVaclav Hapla     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2534157edd7aSVaclav Hapla   }
2535157edd7aSVaclav Hapla 
2536157edd7aSVaclav Hapla   /* Renumber and concatenate local leaves */
2537157edd7aSVaclav Hapla   ilocal_new = NULL;
2538157edd7aSVaclav Hapla   if (!all_ilocal_null) {
25399566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2540157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2541157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2542157edd7aSVaclav Hapla       const PetscInt *ilocal;
25438e3a54c0SPierre Jolivet       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2544157edd7aSVaclav Hapla       PetscInt        i, nleaves_l;
2545157edd7aSVaclav Hapla 
25469566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2547157edd7aSVaclav Hapla       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2548157edd7aSVaclav Hapla     }
2549157edd7aSVaclav Hapla   }
2550157edd7aSVaclav Hapla 
2551157edd7aSVaclav Hapla   /* Renumber and concatenate remote roots */
25521f40158dSVaclav Hapla   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
25531f40158dSVaclav Hapla     PetscInt rootOffset = 0;
25541f40158dSVaclav Hapla 
25559566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2556157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) {
2557157edd7aSVaclav Hapla       iremote_new[i].rank  = -1;
2558157edd7aSVaclav Hapla       iremote_new[i].index = -1;
2559157edd7aSVaclav Hapla     }
2560157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2561157edd7aSVaclav Hapla       PetscInt           i, nl, nr;
2562157edd7aSVaclav Hapla       PetscSF            tmp_sf;
2563157edd7aSVaclav Hapla       const PetscSFNode *iremote;
2564157edd7aSVaclav Hapla       PetscSFNode       *tmp_rootdata;
25658e3a54c0SPierre Jolivet       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2566157edd7aSVaclav Hapla 
25679566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
25689566063dSJacob Faibussowitsch       PetscCall(PetscSFCreate(comm, &tmp_sf));
2569157edd7aSVaclav Hapla       /* create helper SF with contiguous leaves */
25709566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
25719566063dSJacob Faibussowitsch       PetscCall(PetscSFSetUp(tmp_sf));
25729566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
25731f40158dSVaclav Hapla       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2574157edd7aSVaclav Hapla         for (i = 0; i < nr; i++) {
25751f40158dSVaclav Hapla           tmp_rootdata[i].index = i + rootOffset;
2576157edd7aSVaclav Hapla           tmp_rootdata[i].rank  = (PetscInt)rank;
2577157edd7aSVaclav Hapla         }
25781f40158dSVaclav Hapla         rootOffset += nr;
25791f40158dSVaclav Hapla       } else {
25801f40158dSVaclav Hapla         for (i = 0; i < nr; i++) {
25811f40158dSVaclav Hapla           tmp_rootdata[i].index = i;
25821f40158dSVaclav Hapla           tmp_rootdata[i].rank  = (PetscInt)rank;
25831f40158dSVaclav Hapla         }
25841f40158dSVaclav Hapla       }
25859566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
25869566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
25879566063dSJacob Faibussowitsch       PetscCall(PetscSFDestroy(&tmp_sf));
25889566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp_rootdata));
2589157edd7aSVaclav Hapla     }
2590aa624791SPierre Jolivet     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2591157edd7aSVaclav Hapla 
2592157edd7aSVaclav Hapla     /* Build the new SF */
25939566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
25949566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
25951f40158dSVaclav Hapla   } else {
25961f40158dSVaclav Hapla     /* Build the new SF */
25971f40158dSVaclav Hapla     PetscCall(PetscSFCreate(comm, newsf));
25981f40158dSVaclav Hapla     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
25991f40158dSVaclav Hapla   }
26009566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(*newsf));
26011f40158dSVaclav Hapla   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
26021f40158dSVaclav Hapla   PetscCall(PetscLayoutDestroy(&glayout));
26031f40158dSVaclav Hapla   PetscCall(PetscFree(gremote));
26049566063dSJacob Faibussowitsch   PetscCall(PetscFree(leafArrayOffsets));
26053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2606157edd7aSVaclav Hapla }
26078e54d7e8SToby Isaac 
26088e54d7e8SToby Isaac /*@
26098e54d7e8SToby Isaac   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
26108e54d7e8SToby Isaac 
26118e54d7e8SToby Isaac   Collective
26128e54d7e8SToby Isaac 
26138e54d7e8SToby Isaac   Input Parameters:
26148e54d7e8SToby Isaac + sf       - star forest
26158e54d7e8SToby Isaac . unit     - the data type contained within the root and leaf data
26168e54d7e8SToby Isaac . rootdata - root data that will be used for muliple PetscSF communications
26178e54d7e8SToby Isaac - leafdata - leaf data that will be used for muliple PetscSF communications
26188e54d7e8SToby Isaac 
26198e54d7e8SToby Isaac   Level: advanced
26208e54d7e8SToby Isaac 
26218e54d7e8SToby Isaac   Notes:
26228e54d7e8SToby Isaac   Implementations of `PetscSF` can make optimizations
26238e54d7e8SToby Isaac   for repeated communication using the same memory regions, but these optimizations
26248e54d7e8SToby Isaac   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
26258e54d7e8SToby Isaac   The intended pattern is
26268e54d7e8SToby Isaac 
26278e54d7e8SToby Isaac .vb
26288e54d7e8SToby Isaac   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
26298e54d7e8SToby Isaac 
26308e54d7e8SToby Isaac   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
26318e54d7e8SToby Isaac   // repeated use of rootdata and leafdata will now be optimized
26328e54d7e8SToby Isaac 
26338e54d7e8SToby Isaac   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
26348e54d7e8SToby Isaac   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
26358e54d7e8SToby Isaac   // ...
26368e54d7e8SToby Isaac   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
26378e54d7e8SToby Isaac   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
26388e54d7e8SToby Isaac   // ... (other communications)
26398e54d7e8SToby Isaac 
26408e54d7e8SToby Isaac   // rootdata and leafdata must be deregistered before freeing
26418e54d7e8SToby Isaac   // skipping this can lead to undefined behavior including
26428e54d7e8SToby Isaac   // deadlocks
26438e54d7e8SToby Isaac   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
26448e54d7e8SToby Isaac 
26458e54d7e8SToby Isaac   // it is now safe to free rootdata and leafdata
26468e54d7e8SToby Isaac   PetscFree2(rootdata, leafdata);
26478e54d7e8SToby Isaac .ve
26488e54d7e8SToby Isaac 
26498e54d7e8SToby Isaac   If you do not register `rootdata` and `leafdata` it will not cause an error,
26508e54d7e8SToby Isaac   but optimizations that reduce the setup time for each communication cannot be
26518e54d7e8SToby Isaac   made.  Currently, the only implementation of `PetscSF` that benefits from
26528e54d7e8SToby Isaac   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
26538e54d7e8SToby Isaac   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
26548e54d7e8SToby Isaac 
26558e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
26568e54d7e8SToby Isaac @*/
26578e54d7e8SToby Isaac PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
26588e54d7e8SToby Isaac {
26598e54d7e8SToby Isaac   PetscFunctionBegin;
26608e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
26618e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
26628e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
26638e54d7e8SToby Isaac }
26648e54d7e8SToby Isaac 
26658e54d7e8SToby Isaac /*@
26668e54d7e8SToby Isaac   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
26678e54d7e8SToby Isaac 
26688e54d7e8SToby Isaac   Collective
26698e54d7e8SToby Isaac 
26708e54d7e8SToby Isaac   Input Parameters:
26718e54d7e8SToby Isaac + sf       - star forest
26728e54d7e8SToby Isaac . unit     - the data type contained within the root and leaf data
26738e54d7e8SToby Isaac . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
26748e54d7e8SToby Isaac - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
26758e54d7e8SToby Isaac 
26768e54d7e8SToby Isaac   Level: advanced
26778e54d7e8SToby Isaac 
26788e54d7e8SToby Isaac   Note:
26798e54d7e8SToby Isaac   See `PetscSFRegisterPersistent()` for when/how to use this function.
26808e54d7e8SToby Isaac 
26818e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
26828e54d7e8SToby Isaac @*/
26838e54d7e8SToby Isaac PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
26848e54d7e8SToby Isaac {
26858e54d7e8SToby Isaac   PetscFunctionBegin;
26868e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
26878e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
26888e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
26898e54d7e8SToby Isaac }
2690*e1187f0dSToby Isaac 
2691*e1187f0dSToby Isaac PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2692*e1187f0dSToby Isaac {
2693*e1187f0dSToby Isaac   MPI_Aint lb, lb_true, bytes, bytes_true;
2694*e1187f0dSToby Isaac 
2695*e1187f0dSToby Isaac   PetscFunctionBegin;
2696*e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2697*e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2698*e1187f0dSToby Isaac   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2699*e1187f0dSToby Isaac   *size = bytes;
2700*e1187f0dSToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
2701*e1187f0dSToby Isaac }
2702