xref: /petsc/src/vec/is/sf/interface/sf.c (revision 9c9354e5ab274edd4fa0dcd5854b794f85fd380b)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h>
595fce210SBarry Smith 
67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
77fd2d3dbSJunchao Zhang   #include <cuda_runtime.h>
8715b587bSJunchao Zhang   #include <petscdevice_cuda.h>
97fd2d3dbSJunchao Zhang #endif
107fd2d3dbSJunchao Zhang 
117fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
127fd2d3dbSJunchao Zhang   #include <hip/hip_runtime.h>
137fd2d3dbSJunchao Zhang #endif
147fd2d3dbSJunchao Zhang 
152abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER)
164bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int);
172abc8c78SJacob Faibussowitsch #else
1895fce210SBarry Smith   #if defined(PETSC_USE_DEBUG)
19a8f51744SPierre Jolivet     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
2095fce210SBarry Smith   #else
219371c9d4SSatish Balay     #define PetscSFCheckGraphSet(sf, arg) \
229371c9d4SSatish Balay       do { \
239371c9d4SSatish Balay       } while (0)
2495fce210SBarry Smith   #endif
252abc8c78SJacob Faibussowitsch #endif
2695fce210SBarry Smith 
274c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
281f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
2995fce210SBarry Smith 
308af6ec1cSBarry Smith /*@
3195fce210SBarry Smith   PetscSFCreate - create a star forest communication context
3295fce210SBarry Smith 
33d083f849SBarry Smith   Collective
3495fce210SBarry Smith 
354165533cSJose E. Roman   Input Parameter:
3695fce210SBarry Smith . comm - communicator on which the star forest will operate
3795fce210SBarry Smith 
384165533cSJose E. Roman   Output Parameter:
3995fce210SBarry Smith . sf - new star forest context
4095fce210SBarry Smith 
4120662ed9SBarry Smith   Options Database Key:
426677b1c1SJunchao Zhang + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
436677b1c1SJunchao Zhang . -sf_type window                - Use MPI-3 one-sided window for communication
446677b1c1SJunchao Zhang . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
456677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46dd5b3ca6SJunchao Zhang 
4795fce210SBarry Smith   Level: intermediate
4895fce210SBarry Smith 
49cab54364SBarry Smith   Note:
50cab54364SBarry Smith   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51cab54364SBarry Smith   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
5220662ed9SBarry Smith   `SF`s are optimized and they have better performance than the general `SF`s.
53dd5b3ca6SJunchao Zhang 
5438b5cf2dSJacob Faibussowitsch .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
5595fce210SBarry Smith @*/
56d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57d71ae5a4SJacob Faibussowitsch {
5895fce210SBarry Smith   PetscSF b;
5995fce210SBarry Smith 
6095fce210SBarry Smith   PetscFunctionBegin;
614f572ea9SToby Isaac   PetscAssertPointer(sf, 2);
629566063dSJacob Faibussowitsch   PetscCall(PetscSFInitializePackage());
6395fce210SBarry Smith 
649566063dSJacob Faibussowitsch   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
6595fce210SBarry Smith   b->nroots    = -1;
6695fce210SBarry Smith   b->nleaves   = -1;
671690c2aeSBarry Smith   b->minleaf   = PETSC_INT_MAX;
681690c2aeSBarry Smith   b->maxleaf   = PETSC_INT_MIN;
6995fce210SBarry Smith   b->nranks    = -1;
7095fce210SBarry Smith   b->rankorder = PETSC_TRUE;
7195fce210SBarry Smith   b->ingroup   = MPI_GROUP_NULL;
7295fce210SBarry Smith   b->outgroup  = MPI_GROUP_NULL;
7395fce210SBarry Smith   b->graphset  = PETSC_FALSE;
7420c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
7520c24465SJunchao Zhang   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
7620c24465SJunchao Zhang   b->use_stream_aware_mpi = PETSC_FALSE;
7771438e86SJunchao Zhang   b->unknown_input_stream = PETSC_FALSE;
7827f636e8SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
7920c24465SJunchao Zhang   b->backend = PETSCSF_BACKEND_KOKKOS;
8027f636e8SJunchao Zhang   #elif defined(PETSC_HAVE_CUDA)
8127f636e8SJunchao Zhang   b->backend = PETSCSF_BACKEND_CUDA;
8259af0bd3SScott Kruger   #elif defined(PETSC_HAVE_HIP)
8359af0bd3SScott Kruger   b->backend = PETSCSF_BACKEND_HIP;
8420c24465SJunchao Zhang   #endif
8571438e86SJunchao Zhang 
8671438e86SJunchao Zhang   #if defined(PETSC_HAVE_NVSHMEM)
8771438e86SJunchao Zhang   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
8871438e86SJunchao Zhang   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
899566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
909566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
9171438e86SJunchao Zhang   #endif
9220c24465SJunchao Zhang #endif
9360c22052SBarry Smith   b->vscat.from_n = -1;
9460c22052SBarry Smith   b->vscat.to_n   = -1;
9560c22052SBarry Smith   b->vscat.unit   = MPIU_SCALAR;
9695fce210SBarry Smith   *sf             = b;
973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9895fce210SBarry Smith }
9995fce210SBarry Smith 
10029046d53SLisandro Dalcin /*@
10195fce210SBarry Smith   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
10295fce210SBarry Smith 
10395fce210SBarry Smith   Collective
10495fce210SBarry Smith 
1054165533cSJose E. Roman   Input Parameter:
10695fce210SBarry Smith . sf - star forest
10795fce210SBarry Smith 
10895fce210SBarry Smith   Level: advanced
10995fce210SBarry Smith 
110cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
11195fce210SBarry Smith @*/
112d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf)
113d71ae5a4SJacob Faibussowitsch {
11495fce210SBarry Smith   PetscFunctionBegin;
11595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
116dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Reset);
1170dd791a8SStefano Zampini   PetscCall(PetscSFDestroy(&sf->rankssf));
1180dd791a8SStefano Zampini 
11929046d53SLisandro Dalcin   sf->nroots   = -1;
12029046d53SLisandro Dalcin   sf->nleaves  = -1;
1211690c2aeSBarry Smith   sf->minleaf  = PETSC_INT_MAX;
1221690c2aeSBarry Smith   sf->maxleaf  = PETSC_INT_MIN;
12395fce210SBarry Smith   sf->mine     = NULL;
12495fce210SBarry Smith   sf->remote   = NULL;
12529046d53SLisandro Dalcin   sf->graphset = PETSC_FALSE;
1269566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->mine_alloc));
1279566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->remote_alloc));
12821c688dcSJed Brown   sf->nranks = -1;
1299566063dSJacob Faibussowitsch   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
13029046d53SLisandro Dalcin   sf->degreeknown = PETSC_FALSE;
1319566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->degree));
1329566063dSJacob Faibussowitsch   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
1339566063dSJacob Faibussowitsch   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
1340dd791a8SStefano Zampini 
135013b3241SStefano Zampini   if (sf->multi) sf->multi->multi = NULL;
1369566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf->multi));
1370dd791a8SStefano Zampini 
1389566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sf->map));
13971438e86SJunchao Zhang 
14071438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
1419566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
14271438e86SJunchao Zhang #endif
14371438e86SJunchao Zhang 
14495fce210SBarry Smith   sf->setupcalled = PETSC_FALSE;
1453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14695fce210SBarry Smith }
14795fce210SBarry Smith 
148cc4c1da9SBarry Smith /*@
149cab54364SBarry Smith   PetscSFSetType - Set the `PetscSF` communication implementation
15095fce210SBarry Smith 
151c3339decSBarry Smith   Collective
15295fce210SBarry Smith 
15395fce210SBarry Smith   Input Parameters:
154cab54364SBarry Smith + sf   - the `PetscSF` context
15595fce210SBarry Smith - type - a known method
156cab54364SBarry Smith .vb
157cab54364SBarry Smith     PETSCSFWINDOW - MPI-2/3 one-sided
158cab54364SBarry Smith     PETSCSFBASIC - basic implementation using MPI-1 two-sided
159cab54364SBarry Smith .ve
16095fce210SBarry Smith 
16195fce210SBarry Smith   Options Database Key:
16220662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
163cab54364SBarry Smith 
164cab54364SBarry Smith   Level: intermediate
16595fce210SBarry Smith 
16695fce210SBarry Smith   Notes:
16720662ed9SBarry Smith   See `PetscSFType` for possible values
16895fce210SBarry Smith 
16920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
17095fce210SBarry Smith @*/
171d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
172d71ae5a4SJacob Faibussowitsch {
17395fce210SBarry Smith   PetscBool match;
1745f80ce2aSJacob Faibussowitsch   PetscErrorCode (*r)(PetscSF);
17595fce210SBarry Smith 
17695fce210SBarry Smith   PetscFunctionBegin;
17795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1784f572ea9SToby Isaac   PetscAssertPointer(type, 2);
17995fce210SBarry Smith 
1809566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
1813ba16761SJacob Faibussowitsch   if (match) PetscFunctionReturn(PETSC_SUCCESS);
18295fce210SBarry Smith 
1839566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
1846adde796SStefano Zampini   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
18529046d53SLisandro Dalcin   /* Destroy the previous PetscSF implementation context */
186dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Destroy);
1879566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
1889566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
1899566063dSJacob Faibussowitsch   PetscCall((*r)(sf));
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19195fce210SBarry Smith }
19295fce210SBarry Smith 
193cc4c1da9SBarry Smith /*@
194cab54364SBarry Smith   PetscSFGetType - Get the `PetscSF` communication implementation
19529046d53SLisandro Dalcin 
19629046d53SLisandro Dalcin   Not Collective
19729046d53SLisandro Dalcin 
19829046d53SLisandro Dalcin   Input Parameter:
199cab54364SBarry Smith . sf - the `PetscSF` context
20029046d53SLisandro Dalcin 
20129046d53SLisandro Dalcin   Output Parameter:
202cab54364SBarry Smith . type - the `PetscSF` type name
20329046d53SLisandro Dalcin 
20429046d53SLisandro Dalcin   Level: intermediate
20529046d53SLisandro Dalcin 
20620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
20729046d53SLisandro Dalcin @*/
208d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
209d71ae5a4SJacob Faibussowitsch {
21029046d53SLisandro Dalcin   PetscFunctionBegin;
21129046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2124f572ea9SToby Isaac   PetscAssertPointer(type, 2);
21329046d53SLisandro Dalcin   *type = ((PetscObject)sf)->type_name;
2143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21529046d53SLisandro Dalcin }
21629046d53SLisandro Dalcin 
2170764c050SBarry Smith /*@
21820662ed9SBarry Smith   PetscSFDestroy - destroy a star forest
21995fce210SBarry Smith 
22095fce210SBarry Smith   Collective
22195fce210SBarry Smith 
2224165533cSJose E. Roman   Input Parameter:
22395fce210SBarry Smith . sf - address of star forest
22495fce210SBarry Smith 
22595fce210SBarry Smith   Level: intermediate
22695fce210SBarry Smith 
22720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
22895fce210SBarry Smith @*/
229d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf)
230d71ae5a4SJacob Faibussowitsch {
23195fce210SBarry Smith   PetscFunctionBegin;
2323ba16761SJacob Faibussowitsch   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233f4f49eeaSPierre Jolivet   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
234f4f49eeaSPierre Jolivet   if (--((PetscObject)*sf)->refct > 0) {
2359371c9d4SSatish Balay     *sf = NULL;
2363ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2379371c9d4SSatish Balay   }
2389566063dSJacob Faibussowitsch   PetscCall(PetscSFReset(*sf));
239f4f49eeaSPierre Jolivet   PetscTryTypeMethod(*sf, Destroy);
2409566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
2419566063dSJacob Faibussowitsch   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
242c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
243715b587bSJunchao Zhang   if ((*sf)->use_stream_aware_mpi) {
244715b587bSJunchao Zhang     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
245715b587bSJunchao Zhang     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
246715b587bSJunchao Zhang   }
247715b587bSJunchao Zhang #endif
2489566063dSJacob Faibussowitsch   PetscCall(PetscHeaderDestroy(sf));
2493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25095fce210SBarry Smith }
25195fce210SBarry Smith 
252d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
253d71ae5a4SJacob Faibussowitsch {
254c4e6a40aSLawrence Mitchell   PetscInt           i, nleaves;
255c4e6a40aSLawrence Mitchell   PetscMPIInt        size;
256c4e6a40aSLawrence Mitchell   const PetscInt    *ilocal;
257c4e6a40aSLawrence Mitchell   const PetscSFNode *iremote;
258c4e6a40aSLawrence Mitchell 
259c4e6a40aSLawrence Mitchell   PetscFunctionBegin;
2603ba16761SJacob Faibussowitsch   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
2619566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
2629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
263c4e6a40aSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
264c4e6a40aSLawrence Mitchell     const PetscInt rank   = iremote[i].rank;
265c4e6a40aSLawrence Mitchell     const PetscInt remote = iremote[i].index;
266c4e6a40aSLawrence Mitchell     const PetscInt leaf   = ilocal ? ilocal[i] : i;
267c9cc58a2SBarry Smith     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
26808401ef6SPierre Jolivet     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
26908401ef6SPierre Jolivet     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
270c4e6a40aSLawrence Mitchell   }
2713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
272c4e6a40aSLawrence Mitchell }
273c4e6a40aSLawrence Mitchell 
27495fce210SBarry Smith /*@
27520662ed9SBarry Smith   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
27695fce210SBarry Smith 
27795fce210SBarry Smith   Collective
27895fce210SBarry Smith 
2794165533cSJose E. Roman   Input Parameter:
28095fce210SBarry Smith . sf - star forest communication object
28195fce210SBarry Smith 
28295fce210SBarry Smith   Level: beginner
28395fce210SBarry Smith 
28420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
28595fce210SBarry Smith @*/
286d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf)
287d71ae5a4SJacob Faibussowitsch {
28895fce210SBarry Smith   PetscFunctionBegin;
28929046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
29029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
2913ba16761SJacob Faibussowitsch   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
2929566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
2939566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckGraphValid_Private(sf));
2949566063dSJacob Faibussowitsch   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
295dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetUp);
29620c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
29720c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_CUDA) {
29871438e86SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_CUDA;
29971438e86SJunchao Zhang     sf->ops->Free   = PetscSFFree_CUDA;
30020c24465SJunchao Zhang   }
30120c24465SJunchao Zhang #endif
30259af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
30359af0bd3SScott Kruger   if (sf->backend == PETSCSF_BACKEND_HIP) {
30459af0bd3SScott Kruger     sf->ops->Malloc = PetscSFMalloc_HIP;
30559af0bd3SScott Kruger     sf->ops->Free   = PetscSFFree_HIP;
30659af0bd3SScott Kruger   }
30759af0bd3SScott Kruger #endif
30820c24465SJunchao Zhang 
30920c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
31020c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
31120c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Kokkos;
31220c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Kokkos;
31320c24465SJunchao Zhang   }
31420c24465SJunchao Zhang #endif
3159566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
31695fce210SBarry Smith   sf->setupcalled = PETSC_TRUE;
3173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31895fce210SBarry Smith }
31995fce210SBarry Smith 
3208af6ec1cSBarry Smith /*@
321cab54364SBarry Smith   PetscSFSetFromOptions - set `PetscSF` options using the options database
32295fce210SBarry Smith 
32395fce210SBarry Smith   Logically Collective
32495fce210SBarry Smith 
3254165533cSJose E. Roman   Input Parameter:
32695fce210SBarry Smith . sf - star forest
32795fce210SBarry Smith 
32895fce210SBarry Smith   Options Database Keys:
32920662ed9SBarry Smith + -sf_type                      - implementation type, see `PetscSFSetType()`
33051ccb202SJunchao Zhang . -sf_rank_order                - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
33120662ed9SBarry Smith . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
33220662ed9SBarry Smith                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
33320662ed9SBarry Smith                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
33420662ed9SBarry Smith . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
33520662ed9SBarry Smith                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
33695fce210SBarry Smith 
3376497c311SBarry Smith - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
33859af0bd3SScott Kruger                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
33920c24465SJunchao Zhang                                   the only available is kokkos.
34020c24465SJunchao Zhang 
34195fce210SBarry Smith   Level: intermediate
342cab54364SBarry Smith 
343cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
34495fce210SBarry Smith @*/
345d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
346d71ae5a4SJacob Faibussowitsch {
34795fce210SBarry Smith   PetscSFType deft;
34895fce210SBarry Smith   char        type[256];
34995fce210SBarry Smith   PetscBool   flg;
35095fce210SBarry Smith 
35195fce210SBarry Smith   PetscFunctionBegin;
35295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
353d0609cedSBarry Smith   PetscObjectOptionsBegin((PetscObject)sf);
35495fce210SBarry Smith   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
3559566063dSJacob Faibussowitsch   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
3569566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, flg ? type : deft));
3579566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
358f9334340SJunchao Zhang   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
3597fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
36020c24465SJunchao Zhang   {
36120c24465SJunchao Zhang     char      backendstr[32] = {0};
36259af0bd3SScott Kruger     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
36320c24465SJunchao Zhang     /* Change the defaults set in PetscSFCreate() with command line options */
364d5b43468SJose E. Roman     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
3659566063dSJacob Faibussowitsch     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
3669566063dSJacob Faibussowitsch     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
3679566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
3689566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
3699566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
37059af0bd3SScott Kruger   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
37120c24465SJunchao Zhang     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
37220c24465SJunchao Zhang     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
37359af0bd3SScott Kruger     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
37428b400f6SJacob Faibussowitsch     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
37520c24465SJunchao Zhang   #elif defined(PETSC_HAVE_KOKKOS)
37608401ef6SPierre Jolivet     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
37720c24465SJunchao Zhang   #endif
378715b587bSJunchao Zhang 
379715b587bSJunchao Zhang   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
380715b587bSJunchao Zhang     if (sf->use_stream_aware_mpi) {
381715b587bSJunchao Zhang       MPI_Info info;
382715b587bSJunchao Zhang 
383715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_create(&info));
384715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
385715b587bSJunchao Zhang       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
386715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
387715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_free(&info));
388715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
389715b587bSJunchao Zhang     }
390715b587bSJunchao Zhang   #endif
39120c24465SJunchao Zhang   }
392c2a741eeSJunchao Zhang #endif
393dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
394d0609cedSBarry Smith   PetscOptionsEnd();
3953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39695fce210SBarry Smith }
39795fce210SBarry Smith 
39829046d53SLisandro Dalcin /*@
39995fce210SBarry Smith   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
40095fce210SBarry Smith 
40195fce210SBarry Smith   Logically Collective
40295fce210SBarry Smith 
4034165533cSJose E. Roman   Input Parameters:
40495fce210SBarry Smith + sf  - star forest
405cab54364SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
40695fce210SBarry Smith 
40795fce210SBarry Smith   Level: advanced
40895fce210SBarry Smith 
40920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
41095fce210SBarry Smith @*/
411d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
412d71ae5a4SJacob Faibussowitsch {
41395fce210SBarry Smith   PetscFunctionBegin;
41495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
41595fce210SBarry Smith   PetscValidLogicalCollectiveBool(sf, flg, 2);
41628b400f6SJacob Faibussowitsch   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
41795fce210SBarry Smith   sf->rankorder = flg;
4183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41995fce210SBarry Smith }
42095fce210SBarry Smith 
4215d83a8b1SBarry Smith /*@
42295fce210SBarry Smith   PetscSFSetGraph - Set a parallel star forest
42395fce210SBarry Smith 
42495fce210SBarry Smith   Collective
42595fce210SBarry Smith 
4264165533cSJose E. Roman   Input Parameters:
42795fce210SBarry Smith + sf         - star forest
42895fce210SBarry Smith . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
42995fce210SBarry Smith . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
43020662ed9SBarry Smith . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
431c4e6a40aSLawrence Mitchell                during setup in debug mode)
43220662ed9SBarry Smith . localmode  - copy mode for `ilocal`
43326a11704SBarry Smith . iremote    - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves'
43426a11704SBarry Smith                (locations must be >= 0, enforced during setup in debug mode)
43520662ed9SBarry Smith - remotemode - copy mode for `iremote`
43695fce210SBarry Smith 
43795fce210SBarry Smith   Level: intermediate
43895fce210SBarry Smith 
43995452b02SPatrick Sanan   Notes:
44020662ed9SBarry Smith   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
44138ab3f8aSBarry Smith 
44220662ed9SBarry Smith   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
44320662ed9SBarry Smith   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
444db2b9530SVaclav Hapla   PETSc might modify the respective array;
44520662ed9SBarry Smith   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
446cab54364SBarry Smith   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
447db2b9530SVaclav Hapla 
44838b5cf2dSJacob Faibussowitsch   Fortran Notes:
44920662ed9SBarry Smith   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
450c4e6a40aSLawrence Mitchell 
45138b5cf2dSJacob Faibussowitsch   Developer Notes:
452db2b9530SVaclav Hapla   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
45320662ed9SBarry Smith   This also allows to compare leaf sets of two `PetscSF`s easily.
45472bf8598SVaclav Hapla 
45520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
45695fce210SBarry Smith @*/
457*9c9354e5SBarry Smith PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
458d71ae5a4SJacob Faibussowitsch {
459db2b9530SVaclav Hapla   PetscBool unique, contiguous;
46095fce210SBarry Smith 
46195fce210SBarry Smith   PetscFunctionBegin;
46295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
4634f572ea9SToby Isaac   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
4644f572ea9SToby Isaac   if (nleaves > 0) PetscAssertPointer(iremote, 6);
46508401ef6SPierre Jolivet   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
46608401ef6SPierre Jolivet   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
4678da24d32SBarry Smith   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
4688da24d32SBarry Smith    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
4698da24d32SBarry Smith   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
4708da24d32SBarry Smith   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
47129046d53SLisandro Dalcin 
4722a67d2daSStefano Zampini   if (sf->nroots >= 0) { /* Reset only if graph already set */
4739566063dSJacob Faibussowitsch     PetscCall(PetscSFReset(sf));
4742a67d2daSStefano Zampini   }
4752a67d2daSStefano Zampini 
4769566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
4776497c311SBarry Smith   if (PetscDefined(USE_DEBUG)) {
4786497c311SBarry Smith     PetscMPIInt size;
4796497c311SBarry Smith 
4806497c311SBarry Smith     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
4816497c311SBarry Smith     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
4826497c311SBarry Smith   }
48329046d53SLisandro Dalcin 
48495fce210SBarry Smith   sf->nroots  = nroots;
48595fce210SBarry Smith   sf->nleaves = nleaves;
48629046d53SLisandro Dalcin 
487db2b9530SVaclav Hapla   if (localmode == PETSC_COPY_VALUES && ilocal) {
488db2b9530SVaclav Hapla     PetscInt *tlocal = NULL;
489db2b9530SVaclav Hapla 
4909566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tlocal));
4919566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
492db2b9530SVaclav Hapla     ilocal = tlocal;
493db2b9530SVaclav Hapla   }
494db2b9530SVaclav Hapla   if (remotemode == PETSC_COPY_VALUES) {
495db2b9530SVaclav Hapla     PetscSFNode *tremote = NULL;
496db2b9530SVaclav Hapla 
4979566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tremote));
4989566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
499db2b9530SVaclav Hapla     iremote = tremote;
500db2b9530SVaclav Hapla   }
501db2b9530SVaclav Hapla 
50229046d53SLisandro Dalcin   if (nleaves && ilocal) {
503db2b9530SVaclav Hapla     PetscSFNode work;
504db2b9530SVaclav Hapla 
5059566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
5069566063dSJacob Faibussowitsch     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
507db2b9530SVaclav Hapla     unique = PetscNot(unique);
508db2b9530SVaclav Hapla     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
509db2b9530SVaclav Hapla     sf->minleaf = ilocal[0];
510db2b9530SVaclav Hapla     sf->maxleaf = ilocal[nleaves - 1];
511db2b9530SVaclav Hapla     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
51229046d53SLisandro Dalcin   } else {
51329046d53SLisandro Dalcin     sf->minleaf = 0;
51429046d53SLisandro Dalcin     sf->maxleaf = nleaves - 1;
515db2b9530SVaclav Hapla     unique      = PETSC_TRUE;
516db2b9530SVaclav Hapla     contiguous  = PETSC_TRUE;
51729046d53SLisandro Dalcin   }
51829046d53SLisandro Dalcin 
519db2b9530SVaclav Hapla   if (contiguous) {
520db2b9530SVaclav Hapla     if (localmode == PETSC_USE_POINTER) {
521db2b9530SVaclav Hapla       ilocal = NULL;
522db2b9530SVaclav Hapla     } else {
5239566063dSJacob Faibussowitsch       PetscCall(PetscFree(ilocal));
524db2b9530SVaclav Hapla     }
525db2b9530SVaclav Hapla   }
526db2b9530SVaclav Hapla   sf->mine = ilocal;
527db2b9530SVaclav Hapla   if (localmode == PETSC_USE_POINTER) {
52829046d53SLisandro Dalcin     sf->mine_alloc = NULL;
529db2b9530SVaclav Hapla   } else {
530db2b9530SVaclav Hapla     sf->mine_alloc = ilocal;
53195fce210SBarry Smith   }
5326497c311SBarry Smith   if (PetscDefined(USE_DEBUG)) {
5336497c311SBarry Smith     PetscMPIInt size;
5346497c311SBarry Smith 
5356497c311SBarry Smith     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
5366497c311SBarry Smith     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
5376497c311SBarry Smith   }
538db2b9530SVaclav Hapla   sf->remote = iremote;
539db2b9530SVaclav Hapla   if (remotemode == PETSC_USE_POINTER) {
54029046d53SLisandro Dalcin     sf->remote_alloc = NULL;
541db2b9530SVaclav Hapla   } else {
542db2b9530SVaclav Hapla     sf->remote_alloc = iremote;
54395fce210SBarry Smith   }
5449566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
54529046d53SLisandro Dalcin   sf->graphset = PETSC_TRUE;
5463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
54795fce210SBarry Smith }
54895fce210SBarry Smith 
54929046d53SLisandro Dalcin /*@
550cab54364SBarry Smith   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
551dd5b3ca6SJunchao Zhang 
552dd5b3ca6SJunchao Zhang   Collective
553dd5b3ca6SJunchao Zhang 
554dd5b3ca6SJunchao Zhang   Input Parameters:
555cab54364SBarry Smith + sf      - The `PetscSF`
556cab54364SBarry Smith . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
557cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
558cab54364SBarry Smith 
559cab54364SBarry Smith   Level: intermediate
560dd5b3ca6SJunchao Zhang 
561dd5b3ca6SJunchao Zhang   Notes:
56220662ed9SBarry Smith   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
56320662ed9SBarry Smith   `n` and `N` are the local and global sizes of `x` respectively.
564dd5b3ca6SJunchao Zhang 
56520662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
56620662ed9SBarry Smith   sequential vectors `y` on all MPI processes.
567dd5b3ca6SJunchao Zhang 
56820662ed9SBarry Smith   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
56920662ed9SBarry Smith   sequential vector `y` on rank 0.
570dd5b3ca6SJunchao Zhang 
57120662ed9SBarry Smith   In above cases, entries of `x` are roots and entries of `y` are leaves.
572dd5b3ca6SJunchao Zhang 
57320662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
574dd5b3ca6SJunchao Zhang   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
575cab54364SBarry Smith   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
576dd5b3ca6SJunchao Zhang   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
577cab54364SBarry Smith   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
578dd5b3ca6SJunchao Zhang 
579dd5b3ca6SJunchao Zhang   In this case, roots and leaves are symmetric.
580dd5b3ca6SJunchao Zhang 
581cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
582dd5b3ca6SJunchao Zhang  @*/
583d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
584d71ae5a4SJacob Faibussowitsch {
585dd5b3ca6SJunchao Zhang   MPI_Comm    comm;
586dd5b3ca6SJunchao Zhang   PetscInt    n, N, res[2];
587dd5b3ca6SJunchao Zhang   PetscMPIInt rank, size;
588dd5b3ca6SJunchao Zhang   PetscSFType type;
589dd5b3ca6SJunchao Zhang 
590dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
5912abc8c78SJacob Faibussowitsch   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
5924f572ea9SToby Isaac   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
5939566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
5942c71b3e2SJacob Faibussowitsch   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
5959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
597dd5b3ca6SJunchao Zhang 
598dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
599835f2295SStefano Zampini     PetscInt sizei = size;
600835f2295SStefano Zampini 
601dd5b3ca6SJunchao Zhang     type = PETSCSFALLTOALL;
6029566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreate(comm, &sf->map));
6039566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
604835f2295SStefano Zampini     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
6059566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(sf->map));
606dd5b3ca6SJunchao Zhang   } else {
6079566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetLocalSize(map, &n));
6089566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetSize(map, &N));
609dd5b3ca6SJunchao Zhang     res[0] = n;
610dd5b3ca6SJunchao Zhang     res[1] = -n;
611dd5b3ca6SJunchao Zhang     /* Check if n are same over all ranks so that we can optimize it */
612462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
613dd5b3ca6SJunchao Zhang     if (res[0] == -res[1]) { /* same n */
614dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
615dd5b3ca6SJunchao Zhang     } else {
616dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
617dd5b3ca6SJunchao Zhang     }
6189566063dSJacob Faibussowitsch     PetscCall(PetscLayoutReference(map, &sf->map));
619dd5b3ca6SJunchao Zhang   }
6209566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, type));
621dd5b3ca6SJunchao Zhang 
622dd5b3ca6SJunchao Zhang   sf->pattern = pattern;
623dd5b3ca6SJunchao Zhang   sf->mine    = NULL; /* Contiguous */
624dd5b3ca6SJunchao Zhang 
625dd5b3ca6SJunchao Zhang   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
626dd5b3ca6SJunchao Zhang      Also set other easy stuff.
627dd5b3ca6SJunchao Zhang    */
628dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
629dd5b3ca6SJunchao Zhang     sf->nleaves = N;
630dd5b3ca6SJunchao Zhang     sf->nroots  = n;
631dd5b3ca6SJunchao Zhang     sf->nranks  = size;
632dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
633dd5b3ca6SJunchao Zhang     sf->maxleaf = N - 1;
634dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_GATHER) {
635dd5b3ca6SJunchao Zhang     sf->nleaves = rank ? 0 : N;
636dd5b3ca6SJunchao Zhang     sf->nroots  = n;
637dd5b3ca6SJunchao Zhang     sf->nranks  = rank ? 0 : size;
638dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
639dd5b3ca6SJunchao Zhang     sf->maxleaf = rank ? -1 : N - 1;
640dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
641dd5b3ca6SJunchao Zhang     sf->nleaves = size;
642dd5b3ca6SJunchao Zhang     sf->nroots  = size;
643dd5b3ca6SJunchao Zhang     sf->nranks  = size;
644dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
645dd5b3ca6SJunchao Zhang     sf->maxleaf = size - 1;
646dd5b3ca6SJunchao Zhang   }
647dd5b3ca6SJunchao Zhang   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
648dd5b3ca6SJunchao Zhang   sf->graphset = PETSC_TRUE;
6493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
650dd5b3ca6SJunchao Zhang }
651dd5b3ca6SJunchao Zhang 
652dd5b3ca6SJunchao Zhang /*@
653cab54364SBarry Smith   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
65495fce210SBarry Smith 
65595fce210SBarry Smith   Collective
65695fce210SBarry Smith 
6574165533cSJose E. Roman   Input Parameter:
65895fce210SBarry Smith . sf - star forest to invert
65995fce210SBarry Smith 
6604165533cSJose E. Roman   Output Parameter:
66120662ed9SBarry Smith . isf - inverse of `sf`
6624165533cSJose E. Roman 
66395fce210SBarry Smith   Level: advanced
66495fce210SBarry Smith 
66595fce210SBarry Smith   Notes:
66695fce210SBarry Smith   All roots must have degree 1.
66795fce210SBarry Smith 
66895fce210SBarry Smith   The local space may be a permutation, but cannot be sparse.
66995fce210SBarry Smith 
67020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
67195fce210SBarry Smith @*/
672d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
673d71ae5a4SJacob Faibussowitsch {
67495fce210SBarry Smith   PetscMPIInt     rank;
67595fce210SBarry Smith   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
67695fce210SBarry Smith   const PetscInt *ilocal;
67795fce210SBarry Smith   PetscSFNode    *roots, *leaves;
67895fce210SBarry Smith 
67995fce210SBarry Smith   PetscFunctionBegin;
68029046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
68129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
6824f572ea9SToby Isaac   PetscAssertPointer(isf, 2);
68329046d53SLisandro Dalcin 
6849566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
68529046d53SLisandro Dalcin   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
68629046d53SLisandro Dalcin 
6879566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
6889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
689ae9aee6dSMatthew G. Knepley   for (i = 0; i < maxlocal; i++) {
69095fce210SBarry Smith     leaves[i].rank  = rank;
69195fce210SBarry Smith     leaves[i].index = i;
69295fce210SBarry Smith   }
69395fce210SBarry Smith   for (i = 0; i < nroots; i++) {
69495fce210SBarry Smith     roots[i].rank  = -1;
69595fce210SBarry Smith     roots[i].index = -1;
69695fce210SBarry Smith   }
6976497c311SBarry Smith   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
6986497c311SBarry Smith   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
69995fce210SBarry Smith 
70095fce210SBarry Smith   /* Check whether our leaves are sparse */
7019371c9d4SSatish Balay   for (i = 0, count = 0; i < nroots; i++)
7029371c9d4SSatish Balay     if (roots[i].rank >= 0) count++;
70395fce210SBarry Smith   if (count == nroots) newilocal = NULL;
7049371c9d4SSatish Balay   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
70595fce210SBarry Smith     for (i = 0, count = 0; i < nroots; i++) {
70695fce210SBarry Smith       if (roots[i].rank >= 0) {
70795fce210SBarry Smith         newilocal[count]   = i;
70895fce210SBarry Smith         roots[count].rank  = roots[i].rank;
70995fce210SBarry Smith         roots[count].index = roots[i].index;
71095fce210SBarry Smith         count++;
71195fce210SBarry Smith       }
71295fce210SBarry Smith     }
71395fce210SBarry Smith   }
71495fce210SBarry Smith 
7159566063dSJacob Faibussowitsch   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
7169566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
7179566063dSJacob Faibussowitsch   PetscCall(PetscFree2(roots, leaves));
7183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
71995fce210SBarry Smith }
72095fce210SBarry Smith 
72195fce210SBarry Smith /*@
722cab54364SBarry Smith   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
72395fce210SBarry Smith 
72495fce210SBarry Smith   Collective
72595fce210SBarry Smith 
7264165533cSJose E. Roman   Input Parameters:
72795fce210SBarry Smith + sf  - communication object to duplicate
728cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
72995fce210SBarry Smith 
7304165533cSJose E. Roman   Output Parameter:
73195fce210SBarry Smith . newsf - new communication object
73295fce210SBarry Smith 
73395fce210SBarry Smith   Level: beginner
73495fce210SBarry Smith 
73520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
73695fce210SBarry Smith @*/
737d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
738d71ae5a4SJacob Faibussowitsch {
73929046d53SLisandro Dalcin   PetscSFType  type;
74097929ea7SJunchao Zhang   MPI_Datatype dtype = MPIU_SCALAR;
74195fce210SBarry Smith 
74295fce210SBarry Smith   PetscFunctionBegin;
74329046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
74429046d53SLisandro Dalcin   PetscValidLogicalCollectiveEnum(sf, opt, 2);
7454f572ea9SToby Isaac   PetscAssertPointer(newsf, 3);
7469566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
7479566063dSJacob Faibussowitsch   PetscCall(PetscSFGetType(sf, &type));
7489566063dSJacob Faibussowitsch   if (type) PetscCall(PetscSFSetType(*newsf, type));
74935cb6cd3SPierre Jolivet   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
75095fce210SBarry Smith   if (opt == PETSCSF_DUPLICATE_GRAPH) {
751dd5b3ca6SJunchao Zhang     PetscSFCheckGraphSet(sf, 1);
752dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
75395fce210SBarry Smith       PetscInt           nroots, nleaves;
75495fce210SBarry Smith       const PetscInt    *ilocal;
75595fce210SBarry Smith       const PetscSFNode *iremote;
7569566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
7579566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
758dd5b3ca6SJunchao Zhang     } else {
7599566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
760dd5b3ca6SJunchao Zhang     }
76195fce210SBarry Smith   }
76297929ea7SJunchao Zhang   /* Since oldtype is committed, so is newtype, according to MPI */
7639566063dSJacob Faibussowitsch   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
76497929ea7SJunchao Zhang   (*newsf)->vscat.bs     = sf->vscat.bs;
76597929ea7SJunchao Zhang   (*newsf)->vscat.unit   = dtype;
76697929ea7SJunchao Zhang   (*newsf)->vscat.to_n   = sf->vscat.to_n;
76797929ea7SJunchao Zhang   (*newsf)->vscat.from_n = sf->vscat.from_n;
76897929ea7SJunchao Zhang   /* Do not copy lsf. Build it on demand since it is rarely used */
76997929ea7SJunchao Zhang 
77020c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
77120c24465SJunchao Zhang   (*newsf)->backend              = sf->backend;
77271438e86SJunchao Zhang   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
77320c24465SJunchao Zhang   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
77420c24465SJunchao Zhang   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
77520c24465SJunchao Zhang #endif
776dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
77720c24465SJunchao Zhang   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
7783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
77995fce210SBarry Smith }
78095fce210SBarry Smith 
78195fce210SBarry Smith /*@C
78295fce210SBarry Smith   PetscSFGetGraph - Get the graph specifying a parallel star forest
78395fce210SBarry Smith 
78495fce210SBarry Smith   Not Collective
78595fce210SBarry Smith 
7864165533cSJose E. Roman   Input Parameter:
78795fce210SBarry Smith . sf - star forest
78895fce210SBarry Smith 
7894165533cSJose E. Roman   Output Parameters:
79095fce210SBarry Smith + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
79195fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process
79220662ed9SBarry Smith . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
79395fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process
79495fce210SBarry Smith 
795cab54364SBarry Smith   Level: intermediate
796cab54364SBarry Smith 
797373e0d91SLisandro Dalcin   Notes:
79820662ed9SBarry Smith   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
799373e0d91SLisandro Dalcin 
80020662ed9SBarry Smith   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
801db2b9530SVaclav Hapla 
8028dbb0df6SBarry Smith   Fortran Notes:
80320662ed9SBarry Smith   The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
80420662ed9SBarry Smith   want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
8058dbb0df6SBarry Smith 
80620662ed9SBarry Smith   To check for a `NULL` `ilocal` use
8078dbb0df6SBarry Smith $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
808ca797d7aSLawrence Mitchell 
80920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
81095fce210SBarry Smith @*/
811d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
812d71ae5a4SJacob Faibussowitsch {
81395fce210SBarry Smith   PetscFunctionBegin;
81495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
815b8dee149SJunchao Zhang   if (sf->ops->GetGraph) {
816f4f49eeaSPierre Jolivet     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
817b8dee149SJunchao Zhang   } else {
81895fce210SBarry Smith     if (nroots) *nroots = sf->nroots;
81995fce210SBarry Smith     if (nleaves) *nleaves = sf->nleaves;
82095fce210SBarry Smith     if (ilocal) *ilocal = sf->mine;
82195fce210SBarry Smith     if (iremote) *iremote = sf->remote;
822b8dee149SJunchao Zhang   }
8233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
82495fce210SBarry Smith }
82595fce210SBarry Smith 
82629046d53SLisandro Dalcin /*@
82795fce210SBarry Smith   PetscSFGetLeafRange - Get the active leaf ranges
82895fce210SBarry Smith 
82995fce210SBarry Smith   Not Collective
83095fce210SBarry Smith 
8314165533cSJose E. Roman   Input Parameter:
83295fce210SBarry Smith . sf - star forest
83395fce210SBarry Smith 
8344165533cSJose E. Roman   Output Parameters:
83520662ed9SBarry Smith + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
83620662ed9SBarry Smith - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
83795fce210SBarry Smith 
83895fce210SBarry Smith   Level: developer
83995fce210SBarry Smith 
84020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
84195fce210SBarry Smith @*/
842d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
843d71ae5a4SJacob Faibussowitsch {
84495fce210SBarry Smith   PetscFunctionBegin;
84595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
84629046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
84795fce210SBarry Smith   if (minleaf) *minleaf = sf->minleaf;
84895fce210SBarry Smith   if (maxleaf) *maxleaf = sf->maxleaf;
8493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
85095fce210SBarry Smith }
85195fce210SBarry Smith 
852ffeef943SBarry Smith /*@
853cab54364SBarry Smith   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
854fe2efc57SMark 
85520f4b53cSBarry Smith   Collective
856fe2efc57SMark 
857fe2efc57SMark   Input Parameters:
858fe2efc57SMark + A    - the star forest
859cab54364SBarry Smith . obj  - Optional object that provides the prefix for the option names
860736c3998SJose E. Roman - name - command line option
861fe2efc57SMark 
862fe2efc57SMark   Level: intermediate
863cab54364SBarry Smith 
86420662ed9SBarry Smith   Note:
86520662ed9SBarry Smith   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
86620662ed9SBarry Smith 
867db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
868fe2efc57SMark @*/
869d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
870d71ae5a4SJacob Faibussowitsch {
871fe2efc57SMark   PetscFunctionBegin;
872fe2efc57SMark   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
8739566063dSJacob Faibussowitsch   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
8743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
875fe2efc57SMark }
876fe2efc57SMark 
877ffeef943SBarry Smith /*@
87895fce210SBarry Smith   PetscSFView - view a star forest
87995fce210SBarry Smith 
88095fce210SBarry Smith   Collective
88195fce210SBarry Smith 
8824165533cSJose E. Roman   Input Parameters:
88395fce210SBarry Smith + sf     - star forest
884cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
88595fce210SBarry Smith 
88695fce210SBarry Smith   Level: beginner
88795fce210SBarry Smith 
888cab54364SBarry Smith .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
88995fce210SBarry Smith @*/
890d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
891d71ae5a4SJacob Faibussowitsch {
89295fce210SBarry Smith   PetscBool         iascii;
89395fce210SBarry Smith   PetscViewerFormat format;
89495fce210SBarry Smith 
89595fce210SBarry Smith   PetscFunctionBegin;
89695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
8979566063dSJacob Faibussowitsch   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
89895fce210SBarry Smith   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
89995fce210SBarry Smith   PetscCheckSameComm(sf, 1, viewer, 2);
9009566063dSJacob Faibussowitsch   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
9019566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
90253dd6d7dSJunchao Zhang   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
90395fce210SBarry Smith     PetscMPIInt rank;
9046497c311SBarry Smith     PetscInt    j;
90595fce210SBarry Smith 
9069566063dSJacob Faibussowitsch     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
9079566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPushTab(viewer));
908dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
90980153354SVaclav Hapla       if (!sf->graphset) {
9109566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
9119566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopTab(viewer));
9123ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
91380153354SVaclav Hapla       }
9149566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
9159566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9166497c311SBarry Smith       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
917835f2295SStefano Zampini       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
9189566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9199566063dSJacob Faibussowitsch       PetscCall(PetscViewerGetFormat(viewer, &format));
92095fce210SBarry Smith       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
92181bfa7aaSJed Brown         PetscMPIInt *tmpranks, *perm;
9226497c311SBarry Smith 
9239566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
9249566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
9256497c311SBarry Smith         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
9269566063dSJacob Faibussowitsch         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
9279566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
9286497c311SBarry Smith         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
9296497c311SBarry Smith           PetscMPIInt i = perm[ii];
9306497c311SBarry Smith 
9319566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
93248a46eb9SPierre Jolivet           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
93395fce210SBarry Smith         }
9349566063dSJacob Faibussowitsch         PetscCall(PetscFree2(tmpranks, perm));
93595fce210SBarry Smith       }
9369566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9379566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
938dd5b3ca6SJunchao Zhang     }
9399566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPopTab(viewer));
94095fce210SBarry Smith   }
941dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, View, viewer);
9423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
94395fce210SBarry Smith }
94495fce210SBarry Smith 
94595fce210SBarry Smith /*@C
946dec1416fSJunchao Zhang   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
94795fce210SBarry Smith 
94895fce210SBarry Smith   Not Collective
94995fce210SBarry Smith 
9504165533cSJose E. Roman   Input Parameter:
95195fce210SBarry Smith . sf - star forest
95295fce210SBarry Smith 
9534165533cSJose E. Roman   Output Parameters:
95495fce210SBarry Smith + nranks  - number of ranks referenced by local part
95520662ed9SBarry Smith . ranks   - [`nranks`] array of ranks
95620662ed9SBarry Smith . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
9576497c311SBarry Smith . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
9586497c311SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
95995fce210SBarry Smith 
96095fce210SBarry Smith   Level: developer
96195fce210SBarry Smith 
962cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
96395fce210SBarry Smith @*/
964*9c9354e5SBarry Smith PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
965d71ae5a4SJacob Faibussowitsch {
96695fce210SBarry Smith   PetscFunctionBegin;
96795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
96828b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
969dec1416fSJunchao Zhang   if (sf->ops->GetRootRanks) {
9709927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
971dec1416fSJunchao Zhang   } else {
972dec1416fSJunchao Zhang     /* The generic implementation */
97395fce210SBarry Smith     if (nranks) *nranks = sf->nranks;
97495fce210SBarry Smith     if (ranks) *ranks = sf->ranks;
97595fce210SBarry Smith     if (roffset) *roffset = sf->roffset;
97695fce210SBarry Smith     if (rmine) *rmine = sf->rmine;
97795fce210SBarry Smith     if (rremote) *rremote = sf->rremote;
978dec1416fSJunchao Zhang   }
9793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
98095fce210SBarry Smith }
98195fce210SBarry Smith 
9828750ddebSJunchao Zhang /*@C
9838750ddebSJunchao Zhang   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
9848750ddebSJunchao Zhang 
9858750ddebSJunchao Zhang   Not Collective
9868750ddebSJunchao Zhang 
9874165533cSJose E. Roman   Input Parameter:
9888750ddebSJunchao Zhang . sf - star forest
9898750ddebSJunchao Zhang 
9904165533cSJose E. Roman   Output Parameters:
9918750ddebSJunchao Zhang + niranks  - number of leaf ranks referencing roots on this process
99220662ed9SBarry Smith . iranks   - [`niranks`] array of ranks
99320662ed9SBarry Smith . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
99420662ed9SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
9958750ddebSJunchao Zhang 
9968750ddebSJunchao Zhang   Level: developer
9978750ddebSJunchao Zhang 
998cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
9998750ddebSJunchao Zhang @*/
1000*9c9354e5SBarry Smith PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
1001d71ae5a4SJacob Faibussowitsch {
10028750ddebSJunchao Zhang   PetscFunctionBegin;
10038750ddebSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
100428b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
10058750ddebSJunchao Zhang   if (sf->ops->GetLeafRanks) {
10069927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
10078750ddebSJunchao Zhang   } else {
10088750ddebSJunchao Zhang     PetscSFType type;
10099566063dSJacob Faibussowitsch     PetscCall(PetscSFGetType(sf, &type));
101098921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
10118750ddebSJunchao Zhang   }
10123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10138750ddebSJunchao Zhang }
10148750ddebSJunchao Zhang 
1015d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1016d71ae5a4SJacob Faibussowitsch {
1017b5a8e515SJed Brown   PetscInt i;
1018b5a8e515SJed Brown   for (i = 0; i < n; i++) {
1019b5a8e515SJed Brown     if (needle == list[i]) return PETSC_TRUE;
1020b5a8e515SJed Brown   }
1021b5a8e515SJed Brown   return PETSC_FALSE;
1022b5a8e515SJed Brown }
1023b5a8e515SJed Brown 
102495fce210SBarry Smith /*@C
1025cab54364SBarry Smith   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
102621c688dcSJed Brown 
102721c688dcSJed Brown   Collective
102821c688dcSJed Brown 
10294165533cSJose E. Roman   Input Parameters:
1030cab54364SBarry Smith + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1031cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
103221c688dcSJed Brown 
103321c688dcSJed Brown   Level: developer
103421c688dcSJed Brown 
1035cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
103621c688dcSJed Brown @*/
1037d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1038d71ae5a4SJacob Faibussowitsch {
1039eec179cfSJacob Faibussowitsch   PetscHMapI    table;
1040eec179cfSJacob Faibussowitsch   PetscHashIter pos;
10416497c311SBarry Smith   PetscMPIInt   size, groupsize, *groupranks, *ranks;
10426497c311SBarry Smith   PetscInt     *rcount;
10436497c311SBarry Smith   PetscInt      irank, sfnrank, ranksi;
10446497c311SBarry Smith   PetscMPIInt   i, orank = -1;
104521c688dcSJed Brown 
104621c688dcSJed Brown   PetscFunctionBegin;
104721c688dcSJed Brown   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
104829046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
10499566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1050eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(10, &table));
105121c688dcSJed Brown   for (i = 0; i < sf->nleaves; i++) {
105221c688dcSJed Brown     /* Log 1-based rank */
1053eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
105421c688dcSJed Brown   }
10556497c311SBarry Smith   PetscCall(PetscHMapIGetSize(table, &sfnrank));
10566497c311SBarry Smith   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
10579566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
10589566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1059eec179cfSJacob Faibussowitsch   PetscHashIterBegin(table, pos);
106021c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
10616497c311SBarry Smith     PetscHashIterGetKey(table, pos, ranksi);
10626497c311SBarry Smith     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1063eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(table, pos, rcount[i]);
1064eec179cfSJacob Faibussowitsch     PetscHashIterNext(table, pos);
106521c688dcSJed Brown     ranks[i]--; /* Convert back to 0-based */
106621c688dcSJed Brown   }
1067eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&table));
1068b5a8e515SJed Brown 
1069b5a8e515SJed Brown   /* We expect that dgroup is reliably "small" while nranks could be large */
1070b5a8e515SJed Brown   {
10717fb8a5e4SKarl Rupp     MPI_Group    group = MPI_GROUP_NULL;
1072b5a8e515SJed Brown     PetscMPIInt *dgroupranks;
10736497c311SBarry Smith 
10749566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
10759566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
10769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
10779566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &groupranks));
1078b5a8e515SJed Brown     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
10799566063dSJacob Faibussowitsch     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
10809566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
10819566063dSJacob Faibussowitsch     PetscCall(PetscFree(dgroupranks));
1082b5a8e515SJed Brown   }
1083b5a8e515SJed Brown 
1084b5a8e515SJed Brown   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1085b5a8e515SJed Brown   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1086b5a8e515SJed Brown     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1087b5a8e515SJed Brown       if (InList(ranks[i], groupsize, groupranks)) break;
1088b5a8e515SJed Brown     }
1089b5a8e515SJed Brown     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1090b5a8e515SJed Brown       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1091b5a8e515SJed Brown     }
1092b5a8e515SJed Brown     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
10936497c311SBarry Smith       PetscMPIInt tmprank;
10946497c311SBarry Smith       PetscInt    tmpcount;
1095247e8311SStefano Zampini 
1096b5a8e515SJed Brown       tmprank             = ranks[i];
1097b5a8e515SJed Brown       tmpcount            = rcount[i];
1098b5a8e515SJed Brown       ranks[i]            = ranks[sf->ndranks];
1099b5a8e515SJed Brown       rcount[i]           = rcount[sf->ndranks];
1100b5a8e515SJed Brown       ranks[sf->ndranks]  = tmprank;
1101b5a8e515SJed Brown       rcount[sf->ndranks] = tmpcount;
1102b5a8e515SJed Brown       sf->ndranks++;
1103b5a8e515SJed Brown     }
1104b5a8e515SJed Brown   }
11059566063dSJacob Faibussowitsch   PetscCall(PetscFree(groupranks));
11066497c311SBarry Smith   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
11076497c311SBarry Smith   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
110821c688dcSJed Brown   sf->roffset[0] = 0;
110921c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
11109566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
111121c688dcSJed Brown     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
111221c688dcSJed Brown     rcount[i]          = 0;
111321c688dcSJed Brown   }
1114247e8311SStefano Zampini   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1115247e8311SStefano Zampini     /* short circuit */
1116247e8311SStefano Zampini     if (orank != sf->remote[i].rank) {
111721c688dcSJed Brown       /* Search for index of iremote[i].rank in sf->ranks */
1118835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1119835f2295SStefano Zampini       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1120b5a8e515SJed Brown       if (irank < 0) {
1121835f2295SStefano Zampini         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1122b5a8e515SJed Brown         if (irank >= 0) irank += sf->ndranks;
112321c688dcSJed Brown       }
1124247e8311SStefano Zampini     }
1125835f2295SStefano Zampini     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
112621c688dcSJed Brown     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
112721c688dcSJed Brown     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
112821c688dcSJed Brown     rcount[irank]++;
112921c688dcSJed Brown   }
11309566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rcount, ranks));
11313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
113221c688dcSJed Brown }
113321c688dcSJed Brown 
113421c688dcSJed Brown /*@C
113595fce210SBarry Smith   PetscSFGetGroups - gets incoming and outgoing process groups
113695fce210SBarry Smith 
113795fce210SBarry Smith   Collective
113895fce210SBarry Smith 
11394165533cSJose E. Roman   Input Parameter:
114095fce210SBarry Smith . sf - star forest
114195fce210SBarry Smith 
11424165533cSJose E. Roman   Output Parameters:
114395fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots)
114495fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference)
114595fce210SBarry Smith 
114695fce210SBarry Smith   Level: developer
114795fce210SBarry Smith 
1148cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
114995fce210SBarry Smith @*/
1150d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1151d71ae5a4SJacob Faibussowitsch {
11527fb8a5e4SKarl Rupp   MPI_Group group = MPI_GROUP_NULL;
115395fce210SBarry Smith 
115495fce210SBarry Smith   PetscFunctionBegin;
115508401ef6SPierre Jolivet   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
115695fce210SBarry Smith   if (sf->ingroup == MPI_GROUP_NULL) {
115795fce210SBarry Smith     PetscInt        i;
115895fce210SBarry Smith     const PetscInt *indegree;
11596497c311SBarry Smith     PetscMPIInt     rank, *outranks, *inranks, indegree0;
116095fce210SBarry Smith     PetscSFNode    *remote;
116195fce210SBarry Smith     PetscSF         bgcount;
116295fce210SBarry Smith 
116395fce210SBarry Smith     /* Compute the number of incoming ranks */
11649566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nranks, &remote));
116595fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) {
116695fce210SBarry Smith       remote[i].rank  = sf->ranks[i];
116795fce210SBarry Smith       remote[i].index = 0;
116895fce210SBarry Smith     }
11699566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
11709566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
11719566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
11729566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
117395fce210SBarry Smith     /* Enumerate the incoming ranks */
11749566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
11759566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
117695fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
11779566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
11789566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
11799566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11806497c311SBarry Smith     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
11816497c311SBarry Smith     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
11829566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
11839566063dSJacob Faibussowitsch     PetscCall(PetscFree2(inranks, outranks));
11849566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&bgcount));
118595fce210SBarry Smith   }
118695fce210SBarry Smith   *incoming = sf->ingroup;
118795fce210SBarry Smith 
118895fce210SBarry Smith   if (sf->outgroup == MPI_GROUP_NULL) {
11899566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11909566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
11919566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
119295fce210SBarry Smith   }
119395fce210SBarry Smith   *outgoing = sf->outgroup;
11943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
119595fce210SBarry Smith }
119695fce210SBarry Smith 
119729046d53SLisandro Dalcin /*@
11980dd791a8SStefano Zampini   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
11990dd791a8SStefano Zampini 
12000dd791a8SStefano Zampini   Collective
12010dd791a8SStefano Zampini 
12020dd791a8SStefano Zampini   Input Parameter:
12030dd791a8SStefano Zampini . sf - star forest
12040dd791a8SStefano Zampini 
12050dd791a8SStefano Zampini   Output Parameter:
12060dd791a8SStefano Zampini . rsf - the star forest with a single root per process to perform communications
12070dd791a8SStefano Zampini 
12080dd791a8SStefano Zampini   Level: developer
12090dd791a8SStefano Zampini 
12100dd791a8SStefano Zampini .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
12110dd791a8SStefano Zampini @*/
12120dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
12130dd791a8SStefano Zampini {
12140dd791a8SStefano Zampini   PetscFunctionBegin;
12150dd791a8SStefano Zampini   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12160dd791a8SStefano Zampini   PetscAssertPointer(rsf, 2);
12170dd791a8SStefano Zampini   if (!sf->rankssf) {
12180dd791a8SStefano Zampini     PetscSFNode       *rremotes;
12190dd791a8SStefano Zampini     const PetscMPIInt *ranks;
12206497c311SBarry Smith     PetscMPIInt        nranks;
12210dd791a8SStefano Zampini 
12220dd791a8SStefano Zampini     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
12230dd791a8SStefano Zampini     PetscCall(PetscMalloc1(nranks, &rremotes));
12240dd791a8SStefano Zampini     for (PetscInt i = 0; i < nranks; i++) {
12250dd791a8SStefano Zampini       rremotes[i].rank  = ranks[i];
12260dd791a8SStefano Zampini       rremotes[i].index = 0;
12270dd791a8SStefano Zampini     }
12280dd791a8SStefano Zampini     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
12290dd791a8SStefano Zampini     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
12300dd791a8SStefano Zampini   }
12310dd791a8SStefano Zampini   *rsf = sf->rankssf;
12320dd791a8SStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
12330dd791a8SStefano Zampini }
12340dd791a8SStefano Zampini 
12350dd791a8SStefano Zampini /*@
1236cab54364SBarry Smith   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
123795fce210SBarry Smith 
123895fce210SBarry Smith   Collective
123995fce210SBarry Smith 
12404165533cSJose E. Roman   Input Parameter:
124195fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex
124295fce210SBarry Smith 
12434165533cSJose E. Roman   Output Parameter:
124495fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1
124595fce210SBarry Smith 
124695fce210SBarry Smith   Level: developer
124795fce210SBarry Smith 
1248cab54364SBarry Smith   Note:
1249cab54364SBarry Smith   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
125095fce210SBarry Smith   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
125195fce210SBarry Smith   edge, it is a candidate for future optimization that might involve its removal.
125295fce210SBarry Smith 
1253cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
125495fce210SBarry Smith @*/
1255d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1256d71ae5a4SJacob Faibussowitsch {
125795fce210SBarry Smith   PetscFunctionBegin;
125895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12594f572ea9SToby Isaac   PetscAssertPointer(multi, 2);
126095fce210SBarry Smith   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
12619566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
126295fce210SBarry Smith     *multi           = sf->multi;
1263013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12643ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
126595fce210SBarry Smith   }
126695fce210SBarry Smith   if (!sf->multi) {
126795fce210SBarry Smith     const PetscInt *indegree;
12689837ea96SMatthew G. Knepley     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
126995fce210SBarry Smith     PetscSFNode    *remote;
127029046d53SLisandro Dalcin     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
12719566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
12729566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
12739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
127495fce210SBarry Smith     inoffset[0] = 0;
127595fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
12769837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) outones[i] = 1;
12779566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
12789566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
127995fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
128076bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1281ad540459SPierre Jolivet       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
128276bd3646SJed Brown     }
12839566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nleaves, &remote));
128495fce210SBarry Smith     for (i = 0; i < sf->nleaves; i++) {
128595fce210SBarry Smith       remote[i].rank  = sf->remote[i].rank;
128638e7336fSToby Isaac       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
128795fce210SBarry Smith     }
12889566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1289013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12909566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
129195fce210SBarry Smith     if (sf->rankorder) { /* Sort the ranks */
129295fce210SBarry Smith       PetscMPIInt  rank;
129395fce210SBarry Smith       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
129495fce210SBarry Smith       PetscSFNode *newremote;
12959566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
129695fce210SBarry Smith       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
12979566063dSJacob Faibussowitsch       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
12989837ea96SMatthew G. Knepley       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
12999566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
13009566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
130195fce210SBarry Smith       /* Sort the incoming ranks at each vertex, build the inverse map */
130295fce210SBarry Smith       for (i = 0; i < sf->nroots; i++) {
130395fce210SBarry Smith         PetscInt j;
130495fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
13058e3a54c0SPierre Jolivet         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
130695fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
130795fce210SBarry Smith       }
13089566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13099566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13109566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
131195fce210SBarry Smith       for (i = 0; i < sf->nleaves; i++) {
131295fce210SBarry Smith         newremote[i].rank  = sf->remote[i].rank;
131301365b40SToby Isaac         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
131495fce210SBarry Smith       }
13159566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
13169566063dSJacob Faibussowitsch       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
131795fce210SBarry Smith     }
13189566063dSJacob Faibussowitsch     PetscCall(PetscFree3(inoffset, outones, outoffset));
131995fce210SBarry Smith   }
132095fce210SBarry Smith   *multi = sf->multi;
13213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
132295fce210SBarry Smith }
132395fce210SBarry Smith 
132495fce210SBarry Smith /*@C
132520662ed9SBarry Smith   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
132695fce210SBarry Smith 
132795fce210SBarry Smith   Collective
132895fce210SBarry Smith 
13294165533cSJose E. Roman   Input Parameters:
133095fce210SBarry Smith + sf        - original star forest
1331ba2a7774SJunchao Zhang . nselected - number of selected roots on this process
1332ba2a7774SJunchao Zhang - selected  - indices of the selected roots on this process
133395fce210SBarry Smith 
13344165533cSJose E. Roman   Output Parameter:
1335cd620004SJunchao Zhang . esf - new star forest
133695fce210SBarry Smith 
133795fce210SBarry Smith   Level: advanced
133895fce210SBarry Smith 
133995fce210SBarry Smith   Note:
1340cab54364SBarry Smith   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
134195fce210SBarry Smith   be done by calling PetscSFGetGraph().
134295fce210SBarry Smith 
1343cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
134495fce210SBarry Smith @*/
1345d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1346d71ae5a4SJacob Faibussowitsch {
1347cd620004SJunchao Zhang   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1348cd620004SJunchao Zhang   const PetscInt    *ilocal;
1349cd620004SJunchao Zhang   signed char       *rootdata, *leafdata, *leafmem;
1350ba2a7774SJunchao Zhang   const PetscSFNode *iremote;
1351f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1352f659e5c7SJunchao Zhang   MPI_Comm           comm;
135395fce210SBarry Smith 
135495fce210SBarry Smith   PetscFunctionBegin;
135595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
135629046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
13574f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
13584f572ea9SToby Isaac   PetscAssertPointer(esf, 4);
13590511a646SMatthew G. Knepley 
13609566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
13619566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
13629566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
13639566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1364cd620004SJunchao Zhang 
136576bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1366cd620004SJunchao Zhang     PetscBool dups;
13679566063dSJacob Faibussowitsch     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
136828b400f6SJacob Faibussowitsch     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1369511e6246SStefano Zampini     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1370cd620004SJunchao Zhang   }
1371f659e5c7SJunchao Zhang 
1372dbbe0bcdSBarry Smith   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1373dbbe0bcdSBarry Smith   else {
1374cd620004SJunchao Zhang     /* A generic version of creating embedded sf */
13759566063dSJacob Faibussowitsch     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1376cd620004SJunchao Zhang     maxlocal = maxleaf - minleaf + 1;
13779566063dSJacob Faibussowitsch     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
13788e3a54c0SPierre Jolivet     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1379cd620004SJunchao Zhang     /* Tag selected roots and bcast to leaves */
1380cd620004SJunchao Zhang     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
13819566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
13829566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1383ba2a7774SJunchao Zhang 
1384cd620004SJunchao Zhang     /* Build esf with leaves that are still connected */
1385cd620004SJunchao Zhang     esf_nleaves = 0;
1386cd620004SJunchao Zhang     for (i = 0; i < nleaves; i++) {
1387cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1388cd620004SJunchao Zhang       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1389cd620004SJunchao Zhang          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1390cd620004SJunchao Zhang       */
1391cd620004SJunchao Zhang       esf_nleaves += (leafdata[j] ? 1 : 0);
1392cd620004SJunchao Zhang     }
13939566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
13949566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1395cd620004SJunchao Zhang     for (i = n = 0; i < nleaves; i++) {
1396cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1397cd620004SJunchao Zhang       if (leafdata[j]) {
1398cd620004SJunchao Zhang         new_ilocal[n]        = j;
1399cd620004SJunchao Zhang         new_iremote[n].rank  = iremote[i].rank;
1400cd620004SJunchao Zhang         new_iremote[n].index = iremote[i].index;
1401fc1ede2bSMatthew G. Knepley         ++n;
140295fce210SBarry Smith       }
140395fce210SBarry Smith     }
14049566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, esf));
14059566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(*esf));
14069566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
14079566063dSJacob Faibussowitsch     PetscCall(PetscFree2(rootdata, leafmem));
1408f659e5c7SJunchao Zhang   }
14099566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
14103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
141195fce210SBarry Smith }
141295fce210SBarry Smith 
14132f5fb4c2SMatthew G. Knepley /*@C
141420662ed9SBarry Smith   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
14152f5fb4c2SMatthew G. Knepley 
14162f5fb4c2SMatthew G. Knepley   Collective
14172f5fb4c2SMatthew G. Knepley 
14184165533cSJose E. Roman   Input Parameters:
14192f5fb4c2SMatthew G. Knepley + sf        - original star forest
1420f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process
1421f659e5c7SJunchao Zhang - selected  - indices of the selected leaves on this process
14222f5fb4c2SMatthew G. Knepley 
14234165533cSJose E. Roman   Output Parameter:
14242f5fb4c2SMatthew G. Knepley . newsf - new star forest
14252f5fb4c2SMatthew G. Knepley 
14262f5fb4c2SMatthew G. Knepley   Level: advanced
14272f5fb4c2SMatthew G. Knepley 
1428cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
14292f5fb4c2SMatthew G. Knepley @*/
1430d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1431d71ae5a4SJacob Faibussowitsch {
1432f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
1433f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1434f659e5c7SJunchao Zhang   const PetscInt    *ilocal;
1435f659e5c7SJunchao Zhang   PetscInt           i, nroots, *leaves, *new_ilocal;
1436f659e5c7SJunchao Zhang   MPI_Comm           comm;
14372f5fb4c2SMatthew G. Knepley 
14382f5fb4c2SMatthew G. Knepley   PetscFunctionBegin;
14392f5fb4c2SMatthew G. Knepley   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
144029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
14414f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
14424f572ea9SToby Isaac   PetscAssertPointer(newsf, 4);
14432f5fb4c2SMatthew G. Knepley 
1444f659e5c7SJunchao Zhang   /* Uniq selected[] and put results in leaves[] */
14459566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
14469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &leaves));
14479566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(leaves, selected, nselected));
14489566063dSJacob Faibussowitsch   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
144908401ef6SPierre Jolivet   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1450f659e5c7SJunchao Zhang 
1451f659e5c7SJunchao Zhang   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1452dbbe0bcdSBarry Smith   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1453dbbe0bcdSBarry Smith   else {
14549566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
14559566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_ilocal));
14569566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_iremote));
1457f659e5c7SJunchao Zhang     for (i = 0; i < nselected; ++i) {
1458f659e5c7SJunchao Zhang       const PetscInt l     = leaves[i];
1459f659e5c7SJunchao Zhang       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1460f659e5c7SJunchao Zhang       new_iremote[i].rank  = iremote[l].rank;
1461f659e5c7SJunchao Zhang       new_iremote[i].index = iremote[l].index;
14622f5fb4c2SMatthew G. Knepley     }
14639566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
14649566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1465f659e5c7SJunchao Zhang   }
14669566063dSJacob Faibussowitsch   PetscCall(PetscFree(leaves));
14673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14682f5fb4c2SMatthew G. Knepley }
14692f5fb4c2SMatthew G. Knepley 
147095fce210SBarry Smith /*@C
1471cab54364SBarry Smith   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
14723482bfa8SJunchao Zhang 
1473c3339decSBarry Smith   Collective
14743482bfa8SJunchao Zhang 
14754165533cSJose E. Roman   Input Parameters:
14763482bfa8SJunchao Zhang + sf       - star forest on which to communicate
14773482bfa8SJunchao Zhang . unit     - data type associated with each node
14783482bfa8SJunchao Zhang . rootdata - buffer to broadcast
14793482bfa8SJunchao Zhang - op       - operation to use for reduction
14803482bfa8SJunchao Zhang 
14814165533cSJose E. Roman   Output Parameter:
14823482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
14833482bfa8SJunchao Zhang 
14843482bfa8SJunchao Zhang   Level: intermediate
14853482bfa8SJunchao Zhang 
148620662ed9SBarry Smith   Note:
148720662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1488da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1489cab54364SBarry Smith   use `PetscSFBcastWithMemTypeBegin()` instead.
1490cab54364SBarry Smith 
1491cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
14923482bfa8SJunchao Zhang @*/
1493d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1494d71ae5a4SJacob Faibussowitsch {
1495eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
14963482bfa8SJunchao Zhang 
14973482bfa8SJunchao Zhang   PetscFunctionBegin;
14983482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14999566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15009566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
15019566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
15029566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1503dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15049566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15063482bfa8SJunchao Zhang }
15073482bfa8SJunchao Zhang 
15083482bfa8SJunchao Zhang /*@C
150920662ed9SBarry Smith   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
151020662ed9SBarry Smith   to `PetscSFBcastEnd()`
1511d0295fc0SJunchao Zhang 
1512c3339decSBarry Smith   Collective
1513d0295fc0SJunchao Zhang 
15144165533cSJose E. Roman   Input Parameters:
1515d0295fc0SJunchao Zhang + sf        - star forest on which to communicate
1516d0295fc0SJunchao Zhang . unit      - data type associated with each node
1517d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1518d0295fc0SJunchao Zhang . rootdata  - buffer to broadcast
1519d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1520d0295fc0SJunchao Zhang - op        - operation to use for reduction
1521d0295fc0SJunchao Zhang 
15224165533cSJose E. Roman   Output Parameter:
1523d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
1524d0295fc0SJunchao Zhang 
1525d0295fc0SJunchao Zhang   Level: intermediate
1526d0295fc0SJunchao Zhang 
1527cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1528d0295fc0SJunchao Zhang @*/
1529d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1530d71ae5a4SJacob Faibussowitsch {
1531d0295fc0SJunchao Zhang   PetscFunctionBegin;
1532d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15339566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15349566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1535dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15369566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1538d0295fc0SJunchao Zhang }
1539d0295fc0SJunchao Zhang 
1540d0295fc0SJunchao Zhang /*@C
154120662ed9SBarry Smith   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
15423482bfa8SJunchao Zhang 
15433482bfa8SJunchao Zhang   Collective
15443482bfa8SJunchao Zhang 
15454165533cSJose E. Roman   Input Parameters:
15463482bfa8SJunchao Zhang + sf       - star forest
15473482bfa8SJunchao Zhang . unit     - data type
15483482bfa8SJunchao Zhang . rootdata - buffer to broadcast
15493482bfa8SJunchao Zhang - op       - operation to use for reduction
15503482bfa8SJunchao Zhang 
15514165533cSJose E. Roman   Output Parameter:
15523482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
15533482bfa8SJunchao Zhang 
15543482bfa8SJunchao Zhang   Level: intermediate
15553482bfa8SJunchao Zhang 
1556cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
15573482bfa8SJunchao Zhang @*/
1558d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1559d71ae5a4SJacob Faibussowitsch {
15603482bfa8SJunchao Zhang   PetscFunctionBegin;
15613482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15629566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1563dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
15649566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
15653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15663482bfa8SJunchao Zhang }
15673482bfa8SJunchao Zhang 
15683482bfa8SJunchao Zhang /*@C
1569cab54364SBarry Smith   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
157095fce210SBarry Smith 
157195fce210SBarry Smith   Collective
157295fce210SBarry Smith 
15734165533cSJose E. Roman   Input Parameters:
157495fce210SBarry Smith + sf       - star forest
157595fce210SBarry Smith . unit     - data type
157695fce210SBarry Smith . leafdata - values to reduce
157795fce210SBarry Smith - op       - reduction operation
157895fce210SBarry Smith 
15794165533cSJose E. Roman   Output Parameter:
158095fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
158195fce210SBarry Smith 
158295fce210SBarry Smith   Level: intermediate
158395fce210SBarry Smith 
158420662ed9SBarry Smith   Note:
158520662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1586da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1587cab54364SBarry Smith   use `PetscSFReduceWithMemTypeBegin()` instead.
1588d0295fc0SJunchao Zhang 
158920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
159095fce210SBarry Smith @*/
1591d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1592d71ae5a4SJacob Faibussowitsch {
1593eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
159495fce210SBarry Smith 
159595fce210SBarry Smith   PetscFunctionBegin;
159695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15979566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15989566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15999566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
16009566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1601f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
16029566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
160495fce210SBarry Smith }
160595fce210SBarry Smith 
160695fce210SBarry Smith /*@C
1607cab54364SBarry Smith   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1608d0295fc0SJunchao Zhang 
1609d0295fc0SJunchao Zhang   Collective
1610d0295fc0SJunchao Zhang 
16114165533cSJose E. Roman   Input Parameters:
1612d0295fc0SJunchao Zhang + sf        - star forest
1613d0295fc0SJunchao Zhang . unit      - data type
1614d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1615d0295fc0SJunchao Zhang . leafdata  - values to reduce
1616d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1617d0295fc0SJunchao Zhang - op        - reduction operation
1618d0295fc0SJunchao Zhang 
16194165533cSJose E. Roman   Output Parameter:
1620d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root
1621d0295fc0SJunchao Zhang 
1622d0295fc0SJunchao Zhang   Level: intermediate
1623d0295fc0SJunchao Zhang 
162420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1625d0295fc0SJunchao Zhang @*/
1626d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1627d71ae5a4SJacob Faibussowitsch {
1628d0295fc0SJunchao Zhang   PetscFunctionBegin;
1629d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16309566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16319566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1632f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
16339566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1635d0295fc0SJunchao Zhang }
1636d0295fc0SJunchao Zhang 
1637d0295fc0SJunchao Zhang /*@C
163820662ed9SBarry Smith   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
163995fce210SBarry Smith 
164095fce210SBarry Smith   Collective
164195fce210SBarry Smith 
16424165533cSJose E. Roman   Input Parameters:
164395fce210SBarry Smith + sf       - star forest
164495fce210SBarry Smith . unit     - data type
164595fce210SBarry Smith . leafdata - values to reduce
164695fce210SBarry Smith - op       - reduction operation
164795fce210SBarry Smith 
16484165533cSJose E. Roman   Output Parameter:
164995fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
165095fce210SBarry Smith 
165195fce210SBarry Smith   Level: intermediate
165295fce210SBarry Smith 
165320662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
165495fce210SBarry Smith @*/
1655d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1656d71ae5a4SJacob Faibussowitsch {
165795fce210SBarry Smith   PetscFunctionBegin;
165895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16599566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1660dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
16619566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
16623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
166395fce210SBarry Smith }
166495fce210SBarry Smith 
166595fce210SBarry Smith /*@C
1666cab54364SBarry Smith   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1667cab54364SBarry Smith   to be completed with `PetscSFFetchAndOpEnd()`
1668a1729e3fSJunchao Zhang 
1669a1729e3fSJunchao Zhang   Collective
1670a1729e3fSJunchao Zhang 
16714165533cSJose E. Roman   Input Parameters:
1672a1729e3fSJunchao Zhang + sf       - star forest
1673a1729e3fSJunchao Zhang . unit     - data type
1674a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1675a1729e3fSJunchao Zhang - op       - operation to use for reduction
1676a1729e3fSJunchao Zhang 
16774165533cSJose E. Roman   Output Parameters:
1678a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1679a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1680a1729e3fSJunchao Zhang 
1681a1729e3fSJunchao Zhang   Level: advanced
1682a1729e3fSJunchao Zhang 
1683a1729e3fSJunchao Zhang   Note:
1684a1729e3fSJunchao Zhang   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1685a1729e3fSJunchao Zhang   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1686a1729e3fSJunchao Zhang   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1687a1729e3fSJunchao Zhang   integers.
1688a1729e3fSJunchao Zhang 
1689cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1690a1729e3fSJunchao Zhang @*/
1691d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1692d71ae5a4SJacob Faibussowitsch {
1693eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1694a1729e3fSJunchao Zhang 
1695a1729e3fSJunchao Zhang   PetscFunctionBegin;
1696a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16979566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16989566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16999566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
17009566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
17019566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
170208401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1703dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17049566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1706a1729e3fSJunchao Zhang }
1707a1729e3fSJunchao Zhang 
1708a1729e3fSJunchao Zhang /*@C
1709cab54364SBarry Smith   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1710cab54364SBarry Smith   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1711d3b3e55cSJunchao Zhang 
1712d3b3e55cSJunchao Zhang   Collective
1713d3b3e55cSJunchao Zhang 
1714d3b3e55cSJunchao Zhang   Input Parameters:
1715d3b3e55cSJunchao Zhang + sf              - star forest
1716d3b3e55cSJunchao Zhang . unit            - data type
1717d3b3e55cSJunchao Zhang . rootmtype       - memory type of rootdata
1718d3b3e55cSJunchao Zhang . leafmtype       - memory type of leafdata
1719d3b3e55cSJunchao Zhang . leafdata        - leaf values to use in reduction
1720d3b3e55cSJunchao Zhang . leafupdatemtype - memory type of leafupdate
1721d3b3e55cSJunchao Zhang - op              - operation to use for reduction
1722d3b3e55cSJunchao Zhang 
1723d3b3e55cSJunchao Zhang   Output Parameters:
1724d3b3e55cSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1725d3b3e55cSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1726d3b3e55cSJunchao Zhang 
1727d3b3e55cSJunchao Zhang   Level: advanced
1728d3b3e55cSJunchao Zhang 
1729cab54364SBarry Smith   Note:
1730cab54364SBarry Smith   See `PetscSFFetchAndOpBegin()` for more details.
1731d3b3e55cSJunchao Zhang 
173220662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1733d3b3e55cSJunchao Zhang @*/
1734d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1735d71ae5a4SJacob Faibussowitsch {
1736d3b3e55cSJunchao Zhang   PetscFunctionBegin;
1737d3b3e55cSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17389566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
17399566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
174008401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1741dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17429566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1744d3b3e55cSJunchao Zhang }
1745d3b3e55cSJunchao Zhang 
1746d3b3e55cSJunchao Zhang /*@C
174720662ed9SBarry Smith   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
174820662ed9SBarry Smith   to fetch values from roots and update atomically by applying operation using my leaf value
1749a1729e3fSJunchao Zhang 
1750a1729e3fSJunchao Zhang   Collective
1751a1729e3fSJunchao Zhang 
17524165533cSJose E. Roman   Input Parameters:
1753a1729e3fSJunchao Zhang + sf       - star forest
1754a1729e3fSJunchao Zhang . unit     - data type
1755a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1756a1729e3fSJunchao Zhang - op       - operation to use for reduction
1757a1729e3fSJunchao Zhang 
17584165533cSJose E. Roman   Output Parameters:
1759a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1760a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1761a1729e3fSJunchao Zhang 
1762a1729e3fSJunchao Zhang   Level: advanced
1763a1729e3fSJunchao Zhang 
176420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1765a1729e3fSJunchao Zhang @*/
1766d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1767d71ae5a4SJacob Faibussowitsch {
1768a1729e3fSJunchao Zhang   PetscFunctionBegin;
1769a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17709566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1771dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
17729566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
17733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1774a1729e3fSJunchao Zhang }
1775a1729e3fSJunchao Zhang 
1776a1729e3fSJunchao Zhang /*@C
1777cab54364SBarry Smith   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
177895fce210SBarry Smith 
177995fce210SBarry Smith   Collective
178095fce210SBarry Smith 
17814165533cSJose E. Roman   Input Parameter:
178295fce210SBarry Smith . sf - star forest
178395fce210SBarry Smith 
17844165533cSJose E. Roman   Output Parameter:
178595fce210SBarry Smith . degree - degree of each root vertex
178695fce210SBarry Smith 
178795fce210SBarry Smith   Level: advanced
178895fce210SBarry Smith 
1789cab54364SBarry Smith   Note:
179020662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1791ffe67aa5SVáclav Hapla 
1792cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
179395fce210SBarry Smith @*/
17946497c311SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1795d71ae5a4SJacob Faibussowitsch {
179695fce210SBarry Smith   PetscFunctionBegin;
179795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
179895fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
17994f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
1800803bd9e8SMatthew G. Knepley   if (!sf->degreeknown) {
18015b0d146aSStefano Zampini     PetscInt i, nroots = sf->nroots, maxlocal;
180228b400f6SJacob Faibussowitsch     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
18035b0d146aSStefano Zampini     maxlocal = sf->maxleaf - sf->minleaf + 1;
18049566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nroots, &sf->degree));
18059566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
180629046d53SLisandro Dalcin     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
18079837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
18089566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
180995fce210SBarry Smith   }
181095fce210SBarry Smith   *degree = NULL;
18113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
181295fce210SBarry Smith }
181395fce210SBarry Smith 
181495fce210SBarry Smith /*@C
1815cab54364SBarry Smith   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
181695fce210SBarry Smith 
181795fce210SBarry Smith   Collective
181895fce210SBarry Smith 
18194165533cSJose E. Roman   Input Parameter:
182095fce210SBarry Smith . sf - star forest
182195fce210SBarry Smith 
18224165533cSJose E. Roman   Output Parameter:
182395fce210SBarry Smith . degree - degree of each root vertex
182495fce210SBarry Smith 
182595fce210SBarry Smith   Level: developer
182695fce210SBarry Smith 
1827cab54364SBarry Smith   Note:
182820662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1829ffe67aa5SVáclav Hapla 
1830cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
183195fce210SBarry Smith @*/
1832*9c9354e5SBarry Smith PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1833d71ae5a4SJacob Faibussowitsch {
183495fce210SBarry Smith   PetscFunctionBegin;
183595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
183695fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
18374f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
183895fce210SBarry Smith   if (!sf->degreeknown) {
183928b400f6SJacob Faibussowitsch     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
18409566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
18419566063dSJacob Faibussowitsch     PetscCall(PetscFree(sf->degreetmp));
184295fce210SBarry Smith     sf->degreeknown = PETSC_TRUE;
184395fce210SBarry Smith   }
184495fce210SBarry Smith   *degree = sf->degree;
18453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
184695fce210SBarry Smith }
184795fce210SBarry Smith 
1848673100f5SVaclav Hapla /*@C
184920662ed9SBarry Smith   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
185066dfcd1aSVaclav Hapla   Each multi-root is assigned index of the corresponding original root.
1851673100f5SVaclav Hapla 
1852673100f5SVaclav Hapla   Collective
1853673100f5SVaclav Hapla 
18544165533cSJose E. Roman   Input Parameters:
1855673100f5SVaclav Hapla + sf     - star forest
1856cab54364SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1857673100f5SVaclav Hapla 
18584165533cSJose E. Roman   Output Parameters:
185920662ed9SBarry Smith + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
186020662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1861673100f5SVaclav Hapla 
1862673100f5SVaclav Hapla   Level: developer
1863673100f5SVaclav Hapla 
1864cab54364SBarry Smith   Note:
186520662ed9SBarry Smith   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1866ffe67aa5SVáclav Hapla 
1867cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1868673100f5SVaclav Hapla @*/
1869d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1870d71ae5a4SJacob Faibussowitsch {
1871673100f5SVaclav Hapla   PetscSF  msf;
187263bfac88SBarry Smith   PetscInt k = 0, nroots, nmroots;
1873673100f5SVaclav Hapla 
1874673100f5SVaclav Hapla   PetscFunctionBegin;
1875673100f5SVaclav Hapla   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18769566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
18774f572ea9SToby Isaac   if (nroots) PetscAssertPointer(degree, 2);
18784f572ea9SToby Isaac   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
18794f572ea9SToby Isaac   PetscAssertPointer(multiRootsOrigNumbering, 4);
18809566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &msf));
18819566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
18829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
188363bfac88SBarry Smith   for (PetscInt i = 0; i < nroots; i++) {
1884673100f5SVaclav Hapla     if (!degree[i]) continue;
188563bfac88SBarry Smith     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1886673100f5SVaclav Hapla   }
188708401ef6SPierre Jolivet   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
188866dfcd1aSVaclav Hapla   if (nMultiRoots) *nMultiRoots = nmroots;
18893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1890673100f5SVaclav Hapla }
1891673100f5SVaclav Hapla 
189295fce210SBarry Smith /*@C
1893cab54364SBarry Smith   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
189495fce210SBarry Smith 
189595fce210SBarry Smith   Collective
189695fce210SBarry Smith 
18974165533cSJose E. Roman   Input Parameters:
189895fce210SBarry Smith + sf       - star forest
189995fce210SBarry Smith . unit     - data type
190095fce210SBarry Smith - leafdata - leaf data to gather to roots
190195fce210SBarry Smith 
19024165533cSJose E. Roman   Output Parameter:
190395fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
190495fce210SBarry Smith 
190595fce210SBarry Smith   Level: intermediate
190695fce210SBarry Smith 
1907cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
190895fce210SBarry Smith @*/
1909d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1910d71ae5a4SJacob Faibussowitsch {
1911a5526d50SJunchao Zhang   PetscSF multi = NULL;
191295fce210SBarry Smith 
191395fce210SBarry Smith   PetscFunctionBegin;
191495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19159566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19169566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19179566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191995fce210SBarry Smith }
192095fce210SBarry Smith 
192195fce210SBarry Smith /*@C
1922cab54364SBarry Smith   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
192395fce210SBarry Smith 
192495fce210SBarry Smith   Collective
192595fce210SBarry Smith 
19264165533cSJose E. Roman   Input Parameters:
192795fce210SBarry Smith + sf       - star forest
192895fce210SBarry Smith . unit     - data type
192995fce210SBarry Smith - leafdata - leaf data to gather to roots
193095fce210SBarry Smith 
19314165533cSJose E. Roman   Output Parameter:
193295fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
193395fce210SBarry Smith 
193495fce210SBarry Smith   Level: intermediate
193595fce210SBarry Smith 
1936cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
193795fce210SBarry Smith @*/
1938d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1939d71ae5a4SJacob Faibussowitsch {
1940a5526d50SJunchao Zhang   PetscSF multi = NULL;
194195fce210SBarry Smith 
194295fce210SBarry Smith   PetscFunctionBegin;
194395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19449566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19459566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
194795fce210SBarry Smith }
194895fce210SBarry Smith 
194995fce210SBarry Smith /*@C
1950cab54364SBarry Smith   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
195195fce210SBarry Smith 
195295fce210SBarry Smith   Collective
195395fce210SBarry Smith 
19544165533cSJose E. Roman   Input Parameters:
195595fce210SBarry Smith + sf            - star forest
195695fce210SBarry Smith . unit          - data type
195795fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
195895fce210SBarry Smith 
19594165533cSJose E. Roman   Output Parameter:
196095fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
196195fce210SBarry Smith 
196295fce210SBarry Smith   Level: intermediate
196395fce210SBarry Smith 
196420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
196595fce210SBarry Smith @*/
1966d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1967d71ae5a4SJacob Faibussowitsch {
1968a5526d50SJunchao Zhang   PetscSF multi = NULL;
196995fce210SBarry Smith 
197095fce210SBarry Smith   PetscFunctionBegin;
197195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19729566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19739566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19749566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
197695fce210SBarry Smith }
197795fce210SBarry Smith 
197895fce210SBarry Smith /*@C
1979cab54364SBarry Smith   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
198095fce210SBarry Smith 
198195fce210SBarry Smith   Collective
198295fce210SBarry Smith 
19834165533cSJose E. Roman   Input Parameters:
198495fce210SBarry Smith + sf            - star forest
198595fce210SBarry Smith . unit          - data type
198695fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
198795fce210SBarry Smith 
19884165533cSJose E. Roman   Output Parameter:
198995fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
199095fce210SBarry Smith 
199195fce210SBarry Smith   Level: intermediate
199295fce210SBarry Smith 
199320662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
199495fce210SBarry Smith @*/
1995d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1996d71ae5a4SJacob Faibussowitsch {
1997a5526d50SJunchao Zhang   PetscSF multi = NULL;
199895fce210SBarry Smith 
199995fce210SBarry Smith   PetscFunctionBegin;
200095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
20019566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
20029566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
20033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
200495fce210SBarry Smith }
2005a7b3aa13SAta Mesgarnejad 
2006d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2007d71ae5a4SJacob Faibussowitsch {
2008a072220fSLawrence Mitchell   PetscInt        i, n, nleaves;
2009a072220fSLawrence Mitchell   const PetscInt *ilocal = NULL;
2010a072220fSLawrence Mitchell   PetscHSetI      seen;
2011a072220fSLawrence Mitchell 
2012a072220fSLawrence Mitchell   PetscFunctionBegin;
2013b458e8f1SJose E. Roman   if (PetscDefined(USE_DEBUG)) {
20149566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
20159566063dSJacob Faibussowitsch     PetscCall(PetscHSetICreate(&seen));
2016a072220fSLawrence Mitchell     for (i = 0; i < nleaves; i++) {
2017a072220fSLawrence Mitchell       const PetscInt leaf = ilocal ? ilocal[i] : i;
20189566063dSJacob Faibussowitsch       PetscCall(PetscHSetIAdd(seen, leaf));
2019a072220fSLawrence Mitchell     }
20209566063dSJacob Faibussowitsch     PetscCall(PetscHSetIGetSize(seen, &n));
202108401ef6SPierre Jolivet     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
20229566063dSJacob Faibussowitsch     PetscCall(PetscHSetIDestroy(&seen));
2023b458e8f1SJose E. Roman   }
20243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2025a072220fSLawrence Mitchell }
202654729392SStefano Zampini 
2027a7b3aa13SAta Mesgarnejad /*@
2028cab54364SBarry Smith   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2029a7b3aa13SAta Mesgarnejad 
2030a7b3aa13SAta Mesgarnejad   Input Parameters:
2031cab54364SBarry Smith + sfA - The first `PetscSF`
2032cab54364SBarry Smith - sfB - The second `PetscSF`
2033a7b3aa13SAta Mesgarnejad 
20342fe279fdSBarry Smith   Output Parameter:
2035cab54364SBarry Smith . sfBA - The composite `PetscSF`
2036a7b3aa13SAta Mesgarnejad 
2037a7b3aa13SAta Mesgarnejad   Level: developer
2038a7b3aa13SAta Mesgarnejad 
2039a072220fSLawrence Mitchell   Notes:
2040cab54364SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
204154729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots.
204254729392SStefano Zampini 
204320662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
204420662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
204520662ed9SBarry Smith   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
204620662ed9SBarry Smith   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2047a072220fSLawrence Mitchell 
2048db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2049a7b3aa13SAta Mesgarnejad @*/
2050d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2051d71ae5a4SJacob Faibussowitsch {
2052a7b3aa13SAta Mesgarnejad   const PetscSFNode *remotePointsA, *remotePointsB;
2053d41018fbSJunchao Zhang   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
205454729392SStefano Zampini   const PetscInt    *localPointsA, *localPointsB;
205554729392SStefano Zampini   PetscInt          *localPointsBA;
205654729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
205754729392SStefano Zampini   PetscBool          denseB;
2058a7b3aa13SAta Mesgarnejad 
2059a7b3aa13SAta Mesgarnejad   PetscFunctionBegin;
2060a7b3aa13SAta Mesgarnejad   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
206129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfA, 1);
206229046d53SLisandro Dalcin   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
206329046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfB, 2);
206454729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
20654f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
20669566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
20679566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
206854729392SStefano Zampini 
20699566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
20709566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
207120662ed9SBarry Smith   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
207220662ed9SBarry Smith      numRootsB; otherwise, garbage will be broadcasted.
207320662ed9SBarry Smith      Example (comm size = 1):
207420662ed9SBarry Smith      sfA: 0 <- (0, 0)
207520662ed9SBarry Smith      sfB: 100 <- (0, 0)
207620662ed9SBarry Smith           101 <- (0, 1)
207720662ed9SBarry Smith      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
207820662ed9SBarry Smith      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
207920662ed9SBarry Smith      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
208020662ed9SBarry Smith      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
20819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
208254729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
208354729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
208454729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
208554729392SStefano Zampini   }
208654729392SStefano Zampini   for (i = 0; i < numLeavesA; i++) {
20870ea77edaSksagiyam     PetscInt localp = localPointsA ? localPointsA[i] : i;
20880ea77edaSksagiyam 
20890ea77edaSksagiyam     if (localp >= numRootsB) continue;
20900ea77edaSksagiyam     reorderedRemotePointsA[localp] = remotePointsA[i];
209154729392SStefano Zampini   }
2092d41018fbSJunchao Zhang   remotePointsA = reorderedRemotePointsA;
20939566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
20949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
20950ea77edaSksagiyam   for (i = 0; i < maxleaf - minleaf + 1; i++) {
20960ea77edaSksagiyam     leafdataB[i].rank  = -1;
20970ea77edaSksagiyam     leafdataB[i].index = -1;
20980ea77edaSksagiyam   }
20996497c311SBarry Smith   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
21006497c311SBarry Smith   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
21019566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
2102d41018fbSJunchao Zhang 
210354729392SStefano Zampini   denseB = (PetscBool)!localPointsB;
210454729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
210554729392SStefano Zampini     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
210654729392SStefano Zampini     else numLeavesBA++;
210754729392SStefano Zampini   }
210854729392SStefano Zampini   if (denseB) {
2109d41018fbSJunchao Zhang     localPointsBA  = NULL;
2110d41018fbSJunchao Zhang     remotePointsBA = leafdataB;
2111d41018fbSJunchao Zhang   } else {
21129566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
21139566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
211454729392SStefano Zampini     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
211554729392SStefano Zampini       const PetscInt l = localPointsB ? localPointsB[i] : i;
211654729392SStefano Zampini 
211754729392SStefano Zampini       if (leafdataB[l - minleaf].rank == -1) continue;
211854729392SStefano Zampini       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
211954729392SStefano Zampini       localPointsBA[numLeavesBA]  = l;
212054729392SStefano Zampini       numLeavesBA++;
212154729392SStefano Zampini     }
21229566063dSJacob Faibussowitsch     PetscCall(PetscFree(leafdataB));
2123d41018fbSJunchao Zhang   }
21249566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
21259566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
21269566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
21273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2128a7b3aa13SAta Mesgarnejad }
21291c6ba672SJunchao Zhang 
213004c0ada0SJunchao Zhang /*@
2131cab54364SBarry Smith   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
213204c0ada0SJunchao Zhang 
213304c0ada0SJunchao Zhang   Input Parameters:
2134cab54364SBarry Smith + sfA - The first `PetscSF`
2135cab54364SBarry Smith - sfB - The second `PetscSF`
213604c0ada0SJunchao Zhang 
21372fe279fdSBarry Smith   Output Parameter:
2138cab54364SBarry Smith . sfBA - The composite `PetscSF`.
213904c0ada0SJunchao Zhang 
214004c0ada0SJunchao Zhang   Level: developer
214104c0ada0SJunchao Zhang 
214254729392SStefano Zampini   Notes:
214320662ed9SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
214454729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
214520662ed9SBarry Smith   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
214654729392SStefano Zampini 
214720662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
214820662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
214920662ed9SBarry Smith   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
215020662ed9SBarry Smith   on `sfA`, then
215120662ed9SBarry Smith   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
215254729392SStefano Zampini 
2153db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
215404c0ada0SJunchao Zhang @*/
2155d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2156d71ae5a4SJacob Faibussowitsch {
215704c0ada0SJunchao Zhang   const PetscSFNode *remotePointsA, *remotePointsB;
215804c0ada0SJunchao Zhang   PetscSFNode       *remotePointsBA;
215904c0ada0SJunchao Zhang   const PetscInt    *localPointsA, *localPointsB;
216054729392SStefano Zampini   PetscSFNode       *reorderedRemotePointsA = NULL;
216154729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
21625b0d146aSStefano Zampini   MPI_Op             op;
21635b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21645b0d146aSStefano Zampini   PetscBool iswin;
21655b0d146aSStefano Zampini #endif
216604c0ada0SJunchao Zhang 
216704c0ada0SJunchao Zhang   PetscFunctionBegin;
216804c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
216904c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfA, 1);
217004c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
217104c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfB, 2);
217254729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
21734f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
21749566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
21759566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
217654729392SStefano Zampini 
21779566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
21789566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
21795b0d146aSStefano Zampini 
21805b0d146aSStefano Zampini   /* TODO: Check roots of sfB have degree of 1 */
21815b0d146aSStefano Zampini   /* Once we implement it, we can replace the MPI_MAXLOC
218283df288dSJunchao Zhang      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
21835b0d146aSStefano Zampini      We use MPI_MAXLOC only to have a deterministic output from this routine if
21845b0d146aSStefano Zampini      the root condition is not meet.
21855b0d146aSStefano Zampini    */
21865b0d146aSStefano Zampini   op = MPI_MAXLOC;
21875b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21885b0d146aSStefano Zampini   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
21899566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
219083df288dSJunchao Zhang   if (iswin) op = MPI_REPLACE;
21915b0d146aSStefano Zampini #endif
21925b0d146aSStefano Zampini 
21939566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
21949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
219554729392SStefano Zampini   for (i = 0; i < maxleaf - minleaf + 1; i++) {
219654729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
219754729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
219854729392SStefano Zampini   }
219954729392SStefano Zampini   if (localPointsA) {
220054729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
220154729392SStefano Zampini       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
220254729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
220354729392SStefano Zampini     }
220454729392SStefano Zampini   } else {
220554729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
220654729392SStefano Zampini       if (i > maxleaf || i < minleaf) continue;
220754729392SStefano Zampini       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
220854729392SStefano Zampini     }
220954729392SStefano Zampini   }
221054729392SStefano Zampini 
22119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
22129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
221354729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
221454729392SStefano Zampini     remotePointsBA[i].rank  = -1;
221554729392SStefano Zampini     remotePointsBA[i].index = -1;
221654729392SStefano Zampini   }
221754729392SStefano Zampini 
22186497c311SBarry Smith   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22196497c311SBarry Smith   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22209566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
222154729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
222254729392SStefano Zampini     if (remotePointsBA[i].rank == -1) continue;
222354729392SStefano Zampini     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
222454729392SStefano Zampini     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
222554729392SStefano Zampini     localPointsBA[numLeavesBA]        = i;
222654729392SStefano Zampini     numLeavesBA++;
222754729392SStefano Zampini   }
22289566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
22299566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
22309566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
22313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
223204c0ada0SJunchao Zhang }
223304c0ada0SJunchao Zhang 
22341c6ba672SJunchao Zhang /*
2235cab54364SBarry Smith   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
22361c6ba672SJunchao Zhang 
22372fe279fdSBarry Smith   Input Parameter:
2238cab54364SBarry Smith . sf - The global `PetscSF`
22391c6ba672SJunchao Zhang 
22402fe279fdSBarry Smith   Output Parameter:
2241cab54364SBarry Smith . out - The local `PetscSF`
2242cab54364SBarry Smith 
2243cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`
22441c6ba672SJunchao Zhang  */
2245d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2246d71ae5a4SJacob Faibussowitsch {
22471c6ba672SJunchao Zhang   MPI_Comm           comm;
22481c6ba672SJunchao Zhang   PetscMPIInt        myrank;
22491c6ba672SJunchao Zhang   const PetscInt    *ilocal;
22501c6ba672SJunchao Zhang   const PetscSFNode *iremote;
22511c6ba672SJunchao Zhang   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
22521c6ba672SJunchao Zhang   PetscSFNode       *liremote;
22531c6ba672SJunchao Zhang   PetscSF            lsf;
22541c6ba672SJunchao Zhang 
22551c6ba672SJunchao Zhang   PetscFunctionBegin;
22561c6ba672SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2257dbbe0bcdSBarry Smith   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2258dbbe0bcdSBarry Smith   else {
2259835f2295SStefano Zampini     PetscMPIInt irank;
2260835f2295SStefano Zampini 
22611c6ba672SJunchao Zhang     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
22629566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22639566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
22641c6ba672SJunchao Zhang 
22651c6ba672SJunchao Zhang     /* Find out local edges and build a local SF */
22669566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
22679371c9d4SSatish Balay     for (i = lnleaves = 0; i < nleaves; i++) {
2268835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2269835f2295SStefano Zampini       if (irank == myrank) lnleaves++;
22709371c9d4SSatish Balay     }
22719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &lilocal));
22729566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &liremote));
22731c6ba672SJunchao Zhang 
22741c6ba672SJunchao Zhang     for (i = j = 0; i < nleaves; i++) {
2275835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2276835f2295SStefano Zampini       if (irank == myrank) {
22771c6ba672SJunchao Zhang         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
22781c6ba672SJunchao Zhang         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
22791c6ba672SJunchao Zhang         liremote[j].index = iremote[i].index;
22801c6ba672SJunchao Zhang         j++;
22811c6ba672SJunchao Zhang       }
22821c6ba672SJunchao Zhang     }
22839566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
22849566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(lsf));
22859566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
22869566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(lsf));
22871c6ba672SJunchao Zhang     *out = lsf;
22881c6ba672SJunchao Zhang   }
22893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22901c6ba672SJunchao Zhang }
2291dd5b3ca6SJunchao Zhang 
2292dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2293d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2294d71ae5a4SJacob Faibussowitsch {
2295eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
2296dd5b3ca6SJunchao Zhang 
2297dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
2298dd5b3ca6SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
22999566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
23009566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
23019566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
23029566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2303dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
23049566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
23053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2306dd5b3ca6SJunchao Zhang }
2307dd5b3ca6SJunchao Zhang 
2308157edd7aSVaclav Hapla /*@
2309cab54364SBarry Smith   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2310157edd7aSVaclav Hapla 
2311157edd7aSVaclav Hapla   Input Parameters:
2312157edd7aSVaclav Hapla + comm        - the communicator
2313cab54364SBarry Smith . nsfs        - the number of input `PetscSF`
2314cab54364SBarry Smith . sfs         - the array of input `PetscSF`
23151f40158dSVaclav Hapla . rootMode    - the root mode specifying how roots are handled
231620662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2317157edd7aSVaclav Hapla 
23182fe279fdSBarry Smith   Output Parameter:
2319cab54364SBarry Smith . newsf - The resulting `PetscSF`
2320157edd7aSVaclav Hapla 
23211f40158dSVaclav Hapla   Level: advanced
2322157edd7aSVaclav Hapla 
2323157edd7aSVaclav Hapla   Notes:
232420662ed9SBarry Smith   The communicator of all `PetscSF`s in `sfs` must be comm.
2325157edd7aSVaclav Hapla 
232620662ed9SBarry Smith   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
232720662ed9SBarry Smith 
232820662ed9SBarry Smith   The offsets in `leafOffsets` are added to the original leaf indices.
232920662ed9SBarry Smith 
233020662ed9SBarry Smith   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
233120662ed9SBarry Smith   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
233220662ed9SBarry Smith 
233320662ed9SBarry Smith   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2334157edd7aSVaclav Hapla   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2335157edd7aSVaclav Hapla 
233620662ed9SBarry Smith   All root modes retain the essential connectivity condition.
233720662ed9SBarry Smith   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
233820662ed9SBarry Smith   Parameter `rootMode` controls how the input root spaces are combined.
233920662ed9SBarry Smith   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
234020662ed9SBarry Smith   and is also the same in the output `PetscSF`.
23411f40158dSVaclav Hapla   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
23421f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
234320662ed9SBarry Smith   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
23441f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
23451593df67SStefano Zampini   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
23461f40158dSVaclav Hapla   the original root ranks are ignored.
23471f40158dSVaclav Hapla   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
234820662ed9SBarry Smith   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
234920662ed9SBarry Smith   to keep the load balancing.
235020662ed9SBarry Smith   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
23511f40158dSVaclav Hapla 
23521f40158dSVaclav Hapla   Example:
23531f40158dSVaclav Hapla   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
235420662ed9SBarry Smith .vb
235520662ed9SBarry Smith   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
235620662ed9SBarry Smith   for m in {local,global,shared}; do
235720662ed9SBarry Smith     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
235820662ed9SBarry Smith   done
235920662ed9SBarry Smith .ve
236020662ed9SBarry Smith   we generate two identical `PetscSF`s sf_0 and sf_1,
236120662ed9SBarry Smith .vb
236220662ed9SBarry Smith   PetscSF Object: sf_0 2 MPI processes
236320662ed9SBarry Smith     type: basic
236420662ed9SBarry Smith     rank #leaves #roots
236520662ed9SBarry Smith     [ 0]       4      2
236620662ed9SBarry Smith     [ 1]       4      2
236720662ed9SBarry Smith     leaves      roots       roots in global numbering
236820662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
236920662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
237020662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
237120662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
237220662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
237320662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
237420662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
237520662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
237620662ed9SBarry Smith .ve
2377e33f79d8SJacob Faibussowitsch   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
237820662ed9SBarry Smith .vb
237920662ed9SBarry Smith   rootMode = local:
238020662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
238120662ed9SBarry Smith     type: basic
238220662ed9SBarry Smith     rank #leaves #roots
238320662ed9SBarry Smith     [ 0]       8      4
238420662ed9SBarry Smith     [ 1]       8      4
238520662ed9SBarry Smith     leaves      roots       roots in global numbering
238620662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
238720662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
238820662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   4
238920662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   5
239020662ed9SBarry Smith     ( 0,  4) <- ( 0,  2)  =   2
239120662ed9SBarry Smith     ( 0,  5) <- ( 0,  3)  =   3
239220662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
239320662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
239420662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
239520662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
239620662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   4
239720662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   5
239820662ed9SBarry Smith     ( 1,  4) <- ( 0,  2)  =   2
239920662ed9SBarry Smith     ( 1,  5) <- ( 0,  3)  =   3
240020662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
240120662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
240220662ed9SBarry Smith 
240320662ed9SBarry Smith   rootMode = global:
240420662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
240520662ed9SBarry Smith     type: basic
240620662ed9SBarry Smith     rank #leaves #roots
240720662ed9SBarry Smith     [ 0]       8      4
240820662ed9SBarry Smith     [ 1]       8      4
240920662ed9SBarry Smith     leaves      roots       roots in global numbering
241020662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
241120662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
241220662ed9SBarry Smith     ( 0,  2) <- ( 0,  2)  =   2
241320662ed9SBarry Smith     ( 0,  3) <- ( 0,  3)  =   3
241420662ed9SBarry Smith     ( 0,  4) <- ( 1,  0)  =   4
241520662ed9SBarry Smith     ( 0,  5) <- ( 1,  1)  =   5
241620662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
241720662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
241820662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
241920662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
242020662ed9SBarry Smith     ( 1,  2) <- ( 0,  2)  =   2
242120662ed9SBarry Smith     ( 1,  3) <- ( 0,  3)  =   3
242220662ed9SBarry Smith     ( 1,  4) <- ( 1,  0)  =   4
242320662ed9SBarry Smith     ( 1,  5) <- ( 1,  1)  =   5
242420662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
242520662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
242620662ed9SBarry Smith 
242720662ed9SBarry Smith   rootMode = shared:
242820662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
242920662ed9SBarry Smith     type: basic
243020662ed9SBarry Smith     rank #leaves #roots
243120662ed9SBarry Smith     [ 0]       8      2
243220662ed9SBarry Smith     [ 1]       8      2
243320662ed9SBarry Smith     leaves      roots       roots in global numbering
243420662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
243520662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
243620662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
243720662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
243820662ed9SBarry Smith     ( 0,  4) <- ( 0,  0)  =   0
243920662ed9SBarry Smith     ( 0,  5) <- ( 0,  1)  =   1
244020662ed9SBarry Smith     ( 0,  6) <- ( 1,  0)  =   2
244120662ed9SBarry Smith     ( 0,  7) <- ( 1,  1)  =   3
244220662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
244320662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
244420662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
244520662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
244620662ed9SBarry Smith     ( 1,  4) <- ( 0,  0)  =   0
244720662ed9SBarry Smith     ( 1,  5) <- ( 0,  1)  =   1
244820662ed9SBarry Smith     ( 1,  6) <- ( 1,  0)  =   2
244920662ed9SBarry Smith     ( 1,  7) <- ( 1,  1)  =   3
245020662ed9SBarry Smith .ve
24511f40158dSVaclav Hapla 
24521f40158dSVaclav Hapla .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2453157edd7aSVaclav Hapla @*/
24541f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2455d71ae5a4SJacob Faibussowitsch {
2456157edd7aSVaclav Hapla   PetscInt     i, s, nLeaves, nRoots;
2457157edd7aSVaclav Hapla   PetscInt    *leafArrayOffsets;
2458157edd7aSVaclav Hapla   PetscInt    *ilocal_new;
2459157edd7aSVaclav Hapla   PetscSFNode *iremote_new;
2460157edd7aSVaclav Hapla   PetscBool    all_ilocal_null = PETSC_FALSE;
24611f40158dSVaclav Hapla   PetscLayout  glayout         = NULL;
24621f40158dSVaclav Hapla   PetscInt    *gremote         = NULL;
24631f40158dSVaclav Hapla   PetscMPIInt  rank, size;
2464157edd7aSVaclav Hapla 
2465157edd7aSVaclav Hapla   PetscFunctionBegin;
246612f479c1SVaclav Hapla   if (PetscDefined(USE_DEBUG)) {
2467157edd7aSVaclav Hapla     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2468157edd7aSVaclav Hapla 
24699566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, &dummy));
2470157edd7aSVaclav Hapla     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
24714f572ea9SToby Isaac     PetscAssertPointer(sfs, 3);
2472157edd7aSVaclav Hapla     for (i = 0; i < nsfs; i++) {
2473157edd7aSVaclav Hapla       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2474157edd7aSVaclav Hapla       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2475157edd7aSVaclav Hapla     }
24761f40158dSVaclav Hapla     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
24774f572ea9SToby Isaac     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
24784f572ea9SToby Isaac     PetscAssertPointer(newsf, 6);
24799566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&dummy));
2480157edd7aSVaclav Hapla   }
2481157edd7aSVaclav Hapla   if (!nsfs) {
24829566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
24839566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
24843ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2485157edd7aSVaclav Hapla   }
24869566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
24871f40158dSVaclav Hapla   PetscCallMPI(MPI_Comm_size(comm, &size));
2488157edd7aSVaclav Hapla 
24891f40158dSVaclav Hapla   /* Calculate leaf array offsets */
24909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2491157edd7aSVaclav Hapla   leafArrayOffsets[0] = 0;
2492157edd7aSVaclav Hapla   for (s = 0; s < nsfs; s++) {
2493157edd7aSVaclav Hapla     PetscInt nl;
2494157edd7aSVaclav Hapla 
24959566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2496157edd7aSVaclav Hapla     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2497157edd7aSVaclav Hapla   }
2498157edd7aSVaclav Hapla   nLeaves = leafArrayOffsets[nsfs];
2499157edd7aSVaclav Hapla 
25001f40158dSVaclav Hapla   /* Calculate number of roots */
25011f40158dSVaclav Hapla   switch (rootMode) {
25021f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
25031f40158dSVaclav Hapla     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
25041f40158dSVaclav Hapla     if (PetscDefined(USE_DEBUG)) {
25051f40158dSVaclav Hapla       for (s = 1; s < nsfs; s++) {
25061f40158dSVaclav Hapla         PetscInt nr;
25071f40158dSVaclav Hapla 
25081f40158dSVaclav Hapla         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
25091f40158dSVaclav Hapla         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
25101f40158dSVaclav Hapla       }
25111f40158dSVaclav Hapla     }
25121f40158dSVaclav Hapla   } break;
25131f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
25141f40158dSVaclav Hapla     /* Calculate also global layout in this case */
25151f40158dSVaclav Hapla     PetscInt    *nls;
25161f40158dSVaclav Hapla     PetscLayout *lts;
25171f40158dSVaclav Hapla     PetscInt   **inds;
25181f40158dSVaclav Hapla     PetscInt     j;
25191f40158dSVaclav Hapla     PetscInt     rootOffset = 0;
25201f40158dSVaclav Hapla 
25211f40158dSVaclav Hapla     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
25221f40158dSVaclav Hapla     PetscCall(PetscLayoutCreate(comm, &glayout));
25231f40158dSVaclav Hapla     glayout->bs = 1;
25241f40158dSVaclav Hapla     glayout->n  = 0;
25251f40158dSVaclav Hapla     glayout->N  = 0;
25261f40158dSVaclav Hapla     for (s = 0; s < nsfs; s++) {
25271f40158dSVaclav Hapla       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
25281f40158dSVaclav Hapla       glayout->n += lts[s]->n;
25291f40158dSVaclav Hapla       glayout->N += lts[s]->N;
25301f40158dSVaclav Hapla     }
25311f40158dSVaclav Hapla     PetscCall(PetscLayoutSetUp(glayout));
25321f40158dSVaclav Hapla     PetscCall(PetscMalloc1(nLeaves, &gremote));
25331f40158dSVaclav Hapla     for (s = 0, j = 0; s < nsfs; s++) {
25341f40158dSVaclav Hapla       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
25351f40158dSVaclav Hapla       rootOffset += lts[s]->N;
25361f40158dSVaclav Hapla       PetscCall(PetscLayoutDestroy(&lts[s]));
25371f40158dSVaclav Hapla       PetscCall(PetscFree(inds[s]));
25381f40158dSVaclav Hapla     }
25391f40158dSVaclav Hapla     PetscCall(PetscFree3(lts, nls, inds));
25401f40158dSVaclav Hapla     nRoots = glayout->N;
25411f40158dSVaclav Hapla   } break;
25421f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
25431f40158dSVaclav Hapla     /* nRoots calculated later in this case */
25441f40158dSVaclav Hapla     break;
25451f40158dSVaclav Hapla   default:
25461f40158dSVaclav Hapla     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
25471f40158dSVaclav Hapla   }
25481f40158dSVaclav Hapla 
2549157edd7aSVaclav Hapla   if (!leafOffsets) {
2550157edd7aSVaclav Hapla     all_ilocal_null = PETSC_TRUE;
2551157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2552157edd7aSVaclav Hapla       const PetscInt *ilocal;
2553157edd7aSVaclav Hapla 
25549566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2555157edd7aSVaclav Hapla       if (ilocal) {
2556157edd7aSVaclav Hapla         all_ilocal_null = PETSC_FALSE;
2557157edd7aSVaclav Hapla         break;
2558157edd7aSVaclav Hapla       }
2559157edd7aSVaclav Hapla     }
2560157edd7aSVaclav Hapla     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2561157edd7aSVaclav Hapla   }
2562157edd7aSVaclav Hapla 
2563157edd7aSVaclav Hapla   /* Renumber and concatenate local leaves */
2564157edd7aSVaclav Hapla   ilocal_new = NULL;
2565157edd7aSVaclav Hapla   if (!all_ilocal_null) {
25669566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2567157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2568157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2569157edd7aSVaclav Hapla       const PetscInt *ilocal;
25708e3a54c0SPierre Jolivet       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2571157edd7aSVaclav Hapla       PetscInt        i, nleaves_l;
2572157edd7aSVaclav Hapla 
25739566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2574157edd7aSVaclav Hapla       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2575157edd7aSVaclav Hapla     }
2576157edd7aSVaclav Hapla   }
2577157edd7aSVaclav Hapla 
2578157edd7aSVaclav Hapla   /* Renumber and concatenate remote roots */
25791f40158dSVaclav Hapla   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
25801f40158dSVaclav Hapla     PetscInt rootOffset = 0;
25811f40158dSVaclav Hapla 
25829566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2583157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) {
2584157edd7aSVaclav Hapla       iremote_new[i].rank  = -1;
2585157edd7aSVaclav Hapla       iremote_new[i].index = -1;
2586157edd7aSVaclav Hapla     }
2587157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2588157edd7aSVaclav Hapla       PetscInt           i, nl, nr;
2589157edd7aSVaclav Hapla       PetscSF            tmp_sf;
2590157edd7aSVaclav Hapla       const PetscSFNode *iremote;
2591157edd7aSVaclav Hapla       PetscSFNode       *tmp_rootdata;
25928e3a54c0SPierre Jolivet       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2593157edd7aSVaclav Hapla 
25949566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
25959566063dSJacob Faibussowitsch       PetscCall(PetscSFCreate(comm, &tmp_sf));
2596157edd7aSVaclav Hapla       /* create helper SF with contiguous leaves */
25979566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
25989566063dSJacob Faibussowitsch       PetscCall(PetscSFSetUp(tmp_sf));
25999566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
26001f40158dSVaclav Hapla       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2601157edd7aSVaclav Hapla         for (i = 0; i < nr; i++) {
26021f40158dSVaclav Hapla           tmp_rootdata[i].index = i + rootOffset;
26036497c311SBarry Smith           tmp_rootdata[i].rank  = rank;
2604157edd7aSVaclav Hapla         }
26051f40158dSVaclav Hapla         rootOffset += nr;
26061f40158dSVaclav Hapla       } else {
26071f40158dSVaclav Hapla         for (i = 0; i < nr; i++) {
26081f40158dSVaclav Hapla           tmp_rootdata[i].index = i;
26096497c311SBarry Smith           tmp_rootdata[i].rank  = rank;
26101f40158dSVaclav Hapla         }
26111f40158dSVaclav Hapla       }
26126497c311SBarry Smith       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26136497c311SBarry Smith       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26149566063dSJacob Faibussowitsch       PetscCall(PetscSFDestroy(&tmp_sf));
26159566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp_rootdata));
2616157edd7aSVaclav Hapla     }
2617aa624791SPierre Jolivet     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2618157edd7aSVaclav Hapla 
2619157edd7aSVaclav Hapla     /* Build the new SF */
26209566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
26219566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
26221f40158dSVaclav Hapla   } else {
26231f40158dSVaclav Hapla     /* Build the new SF */
26241f40158dSVaclav Hapla     PetscCall(PetscSFCreate(comm, newsf));
26251f40158dSVaclav Hapla     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
26261f40158dSVaclav Hapla   }
26279566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(*newsf));
26281f40158dSVaclav Hapla   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
26291f40158dSVaclav Hapla   PetscCall(PetscLayoutDestroy(&glayout));
26301f40158dSVaclav Hapla   PetscCall(PetscFree(gremote));
26319566063dSJacob Faibussowitsch   PetscCall(PetscFree(leafArrayOffsets));
26323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2633157edd7aSVaclav Hapla }
26348e54d7e8SToby Isaac 
26358e54d7e8SToby Isaac /*@
26368e54d7e8SToby Isaac   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
26378e54d7e8SToby Isaac 
26388e54d7e8SToby Isaac   Collective
26398e54d7e8SToby Isaac 
26408e54d7e8SToby Isaac   Input Parameters:
26418e54d7e8SToby Isaac + sf       - star forest
26428e54d7e8SToby Isaac . unit     - the data type contained within the root and leaf data
2643d7c1f440SPierre Jolivet . rootdata - root data that will be used for multiple PetscSF communications
2644d7c1f440SPierre Jolivet - leafdata - leaf data that will be used for multiple PetscSF communications
26458e54d7e8SToby Isaac 
26468e54d7e8SToby Isaac   Level: advanced
26478e54d7e8SToby Isaac 
26488e54d7e8SToby Isaac   Notes:
26498e54d7e8SToby Isaac   Implementations of `PetscSF` can make optimizations
26508e54d7e8SToby Isaac   for repeated communication using the same memory regions, but these optimizations
26518e54d7e8SToby Isaac   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
26528e54d7e8SToby Isaac   The intended pattern is
26538e54d7e8SToby Isaac 
26548e54d7e8SToby Isaac .vb
26558e54d7e8SToby Isaac   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
26568e54d7e8SToby Isaac 
26578e54d7e8SToby Isaac   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
26588e54d7e8SToby Isaac   // repeated use of rootdata and leafdata will now be optimized
26598e54d7e8SToby Isaac 
26608e54d7e8SToby Isaac   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
26618e54d7e8SToby Isaac   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
26628e54d7e8SToby Isaac   // ...
26638e54d7e8SToby Isaac   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
26648e54d7e8SToby Isaac   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
26658e54d7e8SToby Isaac   // ... (other communications)
26668e54d7e8SToby Isaac 
26678e54d7e8SToby Isaac   // rootdata and leafdata must be deregistered before freeing
26688e54d7e8SToby Isaac   // skipping this can lead to undefined behavior including
26698e54d7e8SToby Isaac   // deadlocks
26708e54d7e8SToby Isaac   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
26718e54d7e8SToby Isaac 
26728e54d7e8SToby Isaac   // it is now safe to free rootdata and leafdata
26738e54d7e8SToby Isaac   PetscFree2(rootdata, leafdata);
26748e54d7e8SToby Isaac .ve
26758e54d7e8SToby Isaac 
26768e54d7e8SToby Isaac   If you do not register `rootdata` and `leafdata` it will not cause an error,
26778e54d7e8SToby Isaac   but optimizations that reduce the setup time for each communication cannot be
26788e54d7e8SToby Isaac   made.  Currently, the only implementation of `PetscSF` that benefits from
26798e54d7e8SToby Isaac   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
26808e54d7e8SToby Isaac   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
26818e54d7e8SToby Isaac 
26828e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
26838e54d7e8SToby Isaac @*/
26848e54d7e8SToby Isaac PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
26858e54d7e8SToby Isaac {
26868e54d7e8SToby Isaac   PetscFunctionBegin;
26878e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
26888e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
26898e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
26908e54d7e8SToby Isaac }
26918e54d7e8SToby Isaac 
26928e54d7e8SToby Isaac /*@
26938e54d7e8SToby Isaac   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
26948e54d7e8SToby Isaac 
26958e54d7e8SToby Isaac   Collective
26968e54d7e8SToby Isaac 
26978e54d7e8SToby Isaac   Input Parameters:
26988e54d7e8SToby Isaac + sf       - star forest
26998e54d7e8SToby Isaac . unit     - the data type contained within the root and leaf data
27008e54d7e8SToby Isaac . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
27018e54d7e8SToby Isaac - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
27028e54d7e8SToby Isaac 
27038e54d7e8SToby Isaac   Level: advanced
27048e54d7e8SToby Isaac 
27058e54d7e8SToby Isaac   Note:
27068e54d7e8SToby Isaac   See `PetscSFRegisterPersistent()` for when/how to use this function.
27078e54d7e8SToby Isaac 
27088e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
27098e54d7e8SToby Isaac @*/
27108e54d7e8SToby Isaac PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
27118e54d7e8SToby Isaac {
27128e54d7e8SToby Isaac   PetscFunctionBegin;
27138e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
27148e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
27158e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
27168e54d7e8SToby Isaac }
2717e1187f0dSToby Isaac 
2718e1187f0dSToby Isaac PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2719e1187f0dSToby Isaac {
2720e1187f0dSToby Isaac   MPI_Aint lb, lb_true, bytes, bytes_true;
2721e1187f0dSToby Isaac 
2722e1187f0dSToby Isaac   PetscFunctionBegin;
2723e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2724e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2725e1187f0dSToby Isaac   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2726e1187f0dSToby Isaac   *size = bytes;
2727e1187f0dSToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
2728e1187f0dSToby Isaac }
2729