xref: /petsc/src/vec/is/sf/interface/sf.c (revision 1690c2ae071c7584458d4e437df7b47bc4686b3c)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h>
595fce210SBarry Smith 
67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
77fd2d3dbSJunchao Zhang   #include <cuda_runtime.h>
8715b587bSJunchao Zhang   #include <petscdevice_cuda.h>
97fd2d3dbSJunchao Zhang #endif
107fd2d3dbSJunchao Zhang 
117fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
127fd2d3dbSJunchao Zhang   #include <hip/hip_runtime.h>
137fd2d3dbSJunchao Zhang #endif
147fd2d3dbSJunchao Zhang 
152abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER)
164bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int);
172abc8c78SJacob Faibussowitsch #else
1895fce210SBarry Smith   #if defined(PETSC_USE_DEBUG)
19a8f51744SPierre Jolivet     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
2095fce210SBarry Smith   #else
219371c9d4SSatish Balay     #define PetscSFCheckGraphSet(sf, arg) \
229371c9d4SSatish Balay       do { \
239371c9d4SSatish Balay       } while (0)
2495fce210SBarry Smith   #endif
252abc8c78SJacob Faibussowitsch #endif
2695fce210SBarry Smith 
274c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
281f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
2995fce210SBarry Smith 
308af6ec1cSBarry Smith /*@
3195fce210SBarry Smith   PetscSFCreate - create a star forest communication context
3295fce210SBarry Smith 
33d083f849SBarry Smith   Collective
3495fce210SBarry Smith 
354165533cSJose E. Roman   Input Parameter:
3695fce210SBarry Smith . comm - communicator on which the star forest will operate
3795fce210SBarry Smith 
384165533cSJose E. Roman   Output Parameter:
3995fce210SBarry Smith . sf - new star forest context
4095fce210SBarry Smith 
4120662ed9SBarry Smith   Options Database Key:
426677b1c1SJunchao Zhang + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
436677b1c1SJunchao Zhang . -sf_type window                - Use MPI-3 one-sided window for communication
446677b1c1SJunchao Zhang . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
456677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46dd5b3ca6SJunchao Zhang 
4795fce210SBarry Smith   Level: intermediate
4895fce210SBarry Smith 
49cab54364SBarry Smith   Note:
50cab54364SBarry Smith   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51cab54364SBarry Smith   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
5220662ed9SBarry Smith   `SF`s are optimized and they have better performance than the general `SF`s.
53dd5b3ca6SJunchao Zhang 
5438b5cf2dSJacob Faibussowitsch .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
5595fce210SBarry Smith @*/
56d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57d71ae5a4SJacob Faibussowitsch {
5895fce210SBarry Smith   PetscSF b;
5995fce210SBarry Smith 
6095fce210SBarry Smith   PetscFunctionBegin;
614f572ea9SToby Isaac   PetscAssertPointer(sf, 2);
629566063dSJacob Faibussowitsch   PetscCall(PetscSFInitializePackage());
6395fce210SBarry Smith 
649566063dSJacob Faibussowitsch   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
6595fce210SBarry Smith   b->nroots    = -1;
6695fce210SBarry Smith   b->nleaves   = -1;
67*1690c2aeSBarry Smith   b->minleaf   = PETSC_INT_MAX;
68*1690c2aeSBarry Smith   b->maxleaf   = PETSC_INT_MIN;
6995fce210SBarry Smith   b->nranks    = -1;
7095fce210SBarry Smith   b->rankorder = PETSC_TRUE;
7195fce210SBarry Smith   b->ingroup   = MPI_GROUP_NULL;
7295fce210SBarry Smith   b->outgroup  = MPI_GROUP_NULL;
7395fce210SBarry Smith   b->graphset  = PETSC_FALSE;
7420c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
7520c24465SJunchao Zhang   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
7620c24465SJunchao Zhang   b->use_stream_aware_mpi = PETSC_FALSE;
7771438e86SJunchao Zhang   b->unknown_input_stream = PETSC_FALSE;
7827f636e8SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
7920c24465SJunchao Zhang   b->backend = PETSCSF_BACKEND_KOKKOS;
8027f636e8SJunchao Zhang   #elif defined(PETSC_HAVE_CUDA)
8127f636e8SJunchao Zhang   b->backend = PETSCSF_BACKEND_CUDA;
8259af0bd3SScott Kruger   #elif defined(PETSC_HAVE_HIP)
8359af0bd3SScott Kruger   b->backend = PETSCSF_BACKEND_HIP;
8420c24465SJunchao Zhang   #endif
8571438e86SJunchao Zhang 
8671438e86SJunchao Zhang   #if defined(PETSC_HAVE_NVSHMEM)
8771438e86SJunchao Zhang   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
8871438e86SJunchao Zhang   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
899566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
909566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
9171438e86SJunchao Zhang   #endif
9220c24465SJunchao Zhang #endif
9360c22052SBarry Smith   b->vscat.from_n = -1;
9460c22052SBarry Smith   b->vscat.to_n   = -1;
9560c22052SBarry Smith   b->vscat.unit   = MPIU_SCALAR;
9695fce210SBarry Smith   *sf             = b;
973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9895fce210SBarry Smith }
9995fce210SBarry Smith 
10029046d53SLisandro Dalcin /*@
10195fce210SBarry Smith   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
10295fce210SBarry Smith 
10395fce210SBarry Smith   Collective
10495fce210SBarry Smith 
1054165533cSJose E. Roman   Input Parameter:
10695fce210SBarry Smith . sf - star forest
10795fce210SBarry Smith 
10895fce210SBarry Smith   Level: advanced
10995fce210SBarry Smith 
110cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
11195fce210SBarry Smith @*/
112d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf)
113d71ae5a4SJacob Faibussowitsch {
11495fce210SBarry Smith   PetscFunctionBegin;
11595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
116dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Reset);
1170dd791a8SStefano Zampini   PetscCall(PetscSFDestroy(&sf->rankssf));
1180dd791a8SStefano Zampini 
11929046d53SLisandro Dalcin   sf->nroots   = -1;
12029046d53SLisandro Dalcin   sf->nleaves  = -1;
121*1690c2aeSBarry Smith   sf->minleaf  = PETSC_INT_MAX;
122*1690c2aeSBarry Smith   sf->maxleaf  = PETSC_INT_MIN;
12395fce210SBarry Smith   sf->mine     = NULL;
12495fce210SBarry Smith   sf->remote   = NULL;
12529046d53SLisandro Dalcin   sf->graphset = PETSC_FALSE;
1269566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->mine_alloc));
1279566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->remote_alloc));
12821c688dcSJed Brown   sf->nranks = -1;
1299566063dSJacob Faibussowitsch   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
13029046d53SLisandro Dalcin   sf->degreeknown = PETSC_FALSE;
1319566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->degree));
1329566063dSJacob Faibussowitsch   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
1339566063dSJacob Faibussowitsch   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
1340dd791a8SStefano Zampini 
135013b3241SStefano Zampini   if (sf->multi) sf->multi->multi = NULL;
1369566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf->multi));
1370dd791a8SStefano Zampini 
1389566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sf->map));
13971438e86SJunchao Zhang 
14071438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
1419566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
14271438e86SJunchao Zhang #endif
14371438e86SJunchao Zhang 
14495fce210SBarry Smith   sf->setupcalled = PETSC_FALSE;
1453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14695fce210SBarry Smith }
14795fce210SBarry Smith 
148cc4c1da9SBarry Smith /*@
149cab54364SBarry Smith   PetscSFSetType - Set the `PetscSF` communication implementation
15095fce210SBarry Smith 
151c3339decSBarry Smith   Collective
15295fce210SBarry Smith 
15395fce210SBarry Smith   Input Parameters:
154cab54364SBarry Smith + sf   - the `PetscSF` context
15595fce210SBarry Smith - type - a known method
156cab54364SBarry Smith .vb
157cab54364SBarry Smith     PETSCSFWINDOW - MPI-2/3 one-sided
158cab54364SBarry Smith     PETSCSFBASIC - basic implementation using MPI-1 two-sided
159cab54364SBarry Smith .ve
16095fce210SBarry Smith 
16195fce210SBarry Smith   Options Database Key:
16220662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
163cab54364SBarry Smith 
164cab54364SBarry Smith   Level: intermediate
16595fce210SBarry Smith 
16695fce210SBarry Smith   Notes:
16720662ed9SBarry Smith   See `PetscSFType` for possible values
16895fce210SBarry Smith 
16920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
17095fce210SBarry Smith @*/
171d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
172d71ae5a4SJacob Faibussowitsch {
17395fce210SBarry Smith   PetscBool match;
1745f80ce2aSJacob Faibussowitsch   PetscErrorCode (*r)(PetscSF);
17595fce210SBarry Smith 
17695fce210SBarry Smith   PetscFunctionBegin;
17795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1784f572ea9SToby Isaac   PetscAssertPointer(type, 2);
17995fce210SBarry Smith 
1809566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
1813ba16761SJacob Faibussowitsch   if (match) PetscFunctionReturn(PETSC_SUCCESS);
18295fce210SBarry Smith 
1839566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
1846adde796SStefano Zampini   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
18529046d53SLisandro Dalcin   /* Destroy the previous PetscSF implementation context */
186dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Destroy);
1879566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
1889566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
1899566063dSJacob Faibussowitsch   PetscCall((*r)(sf));
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19195fce210SBarry Smith }
19295fce210SBarry Smith 
193cc4c1da9SBarry Smith /*@
194cab54364SBarry Smith   PetscSFGetType - Get the `PetscSF` communication implementation
19529046d53SLisandro Dalcin 
19629046d53SLisandro Dalcin   Not Collective
19729046d53SLisandro Dalcin 
19829046d53SLisandro Dalcin   Input Parameter:
199cab54364SBarry Smith . sf - the `PetscSF` context
20029046d53SLisandro Dalcin 
20129046d53SLisandro Dalcin   Output Parameter:
202cab54364SBarry Smith . type - the `PetscSF` type name
20329046d53SLisandro Dalcin 
20429046d53SLisandro Dalcin   Level: intermediate
20529046d53SLisandro Dalcin 
20620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
20729046d53SLisandro Dalcin @*/
208d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
209d71ae5a4SJacob Faibussowitsch {
21029046d53SLisandro Dalcin   PetscFunctionBegin;
21129046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2124f572ea9SToby Isaac   PetscAssertPointer(type, 2);
21329046d53SLisandro Dalcin   *type = ((PetscObject)sf)->type_name;
2143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21529046d53SLisandro Dalcin }
21629046d53SLisandro Dalcin 
2170764c050SBarry Smith /*@
21820662ed9SBarry Smith   PetscSFDestroy - destroy a star forest
21995fce210SBarry Smith 
22095fce210SBarry Smith   Collective
22195fce210SBarry Smith 
2224165533cSJose E. Roman   Input Parameter:
22395fce210SBarry Smith . sf - address of star forest
22495fce210SBarry Smith 
22595fce210SBarry Smith   Level: intermediate
22695fce210SBarry Smith 
22720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
22895fce210SBarry Smith @*/
229d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf)
230d71ae5a4SJacob Faibussowitsch {
23195fce210SBarry Smith   PetscFunctionBegin;
2323ba16761SJacob Faibussowitsch   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233f4f49eeaSPierre Jolivet   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
234f4f49eeaSPierre Jolivet   if (--((PetscObject)*sf)->refct > 0) {
2359371c9d4SSatish Balay     *sf = NULL;
2363ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2379371c9d4SSatish Balay   }
2389566063dSJacob Faibussowitsch   PetscCall(PetscSFReset(*sf));
239f4f49eeaSPierre Jolivet   PetscTryTypeMethod(*sf, Destroy);
2409566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
2419566063dSJacob Faibussowitsch   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
242c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
243715b587bSJunchao Zhang   if ((*sf)->use_stream_aware_mpi) {
244715b587bSJunchao Zhang     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
245715b587bSJunchao Zhang     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
246715b587bSJunchao Zhang   }
247715b587bSJunchao Zhang #endif
2489566063dSJacob Faibussowitsch   PetscCall(PetscHeaderDestroy(sf));
2493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25095fce210SBarry Smith }
25195fce210SBarry Smith 
252d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
253d71ae5a4SJacob Faibussowitsch {
254c4e6a40aSLawrence Mitchell   PetscInt           i, nleaves;
255c4e6a40aSLawrence Mitchell   PetscMPIInt        size;
256c4e6a40aSLawrence Mitchell   const PetscInt    *ilocal;
257c4e6a40aSLawrence Mitchell   const PetscSFNode *iremote;
258c4e6a40aSLawrence Mitchell 
259c4e6a40aSLawrence Mitchell   PetscFunctionBegin;
2603ba16761SJacob Faibussowitsch   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
2619566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
2629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
263c4e6a40aSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
264c4e6a40aSLawrence Mitchell     const PetscInt rank   = iremote[i].rank;
265c4e6a40aSLawrence Mitchell     const PetscInt remote = iremote[i].index;
266c4e6a40aSLawrence Mitchell     const PetscInt leaf   = ilocal ? ilocal[i] : i;
267c9cc58a2SBarry Smith     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
26808401ef6SPierre Jolivet     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
26908401ef6SPierre Jolivet     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
270c4e6a40aSLawrence Mitchell   }
2713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
272c4e6a40aSLawrence Mitchell }
273c4e6a40aSLawrence Mitchell 
27495fce210SBarry Smith /*@
27520662ed9SBarry Smith   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
27695fce210SBarry Smith 
27795fce210SBarry Smith   Collective
27895fce210SBarry Smith 
2794165533cSJose E. Roman   Input Parameter:
28095fce210SBarry Smith . sf - star forest communication object
28195fce210SBarry Smith 
28295fce210SBarry Smith   Level: beginner
28395fce210SBarry Smith 
28420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
28595fce210SBarry Smith @*/
286d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf)
287d71ae5a4SJacob Faibussowitsch {
28895fce210SBarry Smith   PetscFunctionBegin;
28929046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
29029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
2913ba16761SJacob Faibussowitsch   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
2929566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
2939566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckGraphValid_Private(sf));
2949566063dSJacob Faibussowitsch   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
295dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetUp);
29620c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
29720c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_CUDA) {
29871438e86SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_CUDA;
29971438e86SJunchao Zhang     sf->ops->Free   = PetscSFFree_CUDA;
30020c24465SJunchao Zhang   }
30120c24465SJunchao Zhang #endif
30259af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
30359af0bd3SScott Kruger   if (sf->backend == PETSCSF_BACKEND_HIP) {
30459af0bd3SScott Kruger     sf->ops->Malloc = PetscSFMalloc_HIP;
30559af0bd3SScott Kruger     sf->ops->Free   = PetscSFFree_HIP;
30659af0bd3SScott Kruger   }
30759af0bd3SScott Kruger #endif
30820c24465SJunchao Zhang 
30920c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
31020c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
31120c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Kokkos;
31220c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Kokkos;
31320c24465SJunchao Zhang   }
31420c24465SJunchao Zhang #endif
3159566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
31695fce210SBarry Smith   sf->setupcalled = PETSC_TRUE;
3173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31895fce210SBarry Smith }
31995fce210SBarry Smith 
3208af6ec1cSBarry Smith /*@
321cab54364SBarry Smith   PetscSFSetFromOptions - set `PetscSF` options using the options database
32295fce210SBarry Smith 
32395fce210SBarry Smith   Logically Collective
32495fce210SBarry Smith 
3254165533cSJose E. Roman   Input Parameter:
32695fce210SBarry Smith . sf - star forest
32795fce210SBarry Smith 
32895fce210SBarry Smith   Options Database Keys:
32920662ed9SBarry Smith + -sf_type                      - implementation type, see `PetscSFSetType()`
33051ccb202SJunchao Zhang . -sf_rank_order                - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
33120662ed9SBarry Smith . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
33220662ed9SBarry Smith                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
33320662ed9SBarry Smith                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
33420662ed9SBarry Smith . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
33520662ed9SBarry Smith                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
33695fce210SBarry Smith 
3376497c311SBarry Smith - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
33859af0bd3SScott Kruger                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
33920c24465SJunchao Zhang                                   the only available is kokkos.
34020c24465SJunchao Zhang 
34195fce210SBarry Smith   Level: intermediate
342cab54364SBarry Smith 
343cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
34495fce210SBarry Smith @*/
345d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
346d71ae5a4SJacob Faibussowitsch {
34795fce210SBarry Smith   PetscSFType deft;
34895fce210SBarry Smith   char        type[256];
34995fce210SBarry Smith   PetscBool   flg;
35095fce210SBarry Smith 
35195fce210SBarry Smith   PetscFunctionBegin;
35295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
353d0609cedSBarry Smith   PetscObjectOptionsBegin((PetscObject)sf);
35495fce210SBarry Smith   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
3559566063dSJacob Faibussowitsch   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
3569566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, flg ? type : deft));
3579566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
3587fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
35920c24465SJunchao Zhang   {
36020c24465SJunchao Zhang     char      backendstr[32] = {0};
36159af0bd3SScott Kruger     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
36220c24465SJunchao Zhang     /* Change the defaults set in PetscSFCreate() with command line options */
363d5b43468SJose E. Roman     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
3649566063dSJacob Faibussowitsch     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
3659566063dSJacob Faibussowitsch     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
3669566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
3679566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
3689566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
36959af0bd3SScott Kruger   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
37020c24465SJunchao Zhang     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
37120c24465SJunchao Zhang     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
37259af0bd3SScott Kruger     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
37328b400f6SJacob Faibussowitsch     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
37420c24465SJunchao Zhang   #elif defined(PETSC_HAVE_KOKKOS)
37508401ef6SPierre Jolivet     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
37620c24465SJunchao Zhang   #endif
377715b587bSJunchao Zhang 
378715b587bSJunchao Zhang   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379715b587bSJunchao Zhang     if (sf->use_stream_aware_mpi) {
380715b587bSJunchao Zhang       MPI_Info info;
381715b587bSJunchao Zhang 
382715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_create(&info));
383715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384715b587bSJunchao Zhang       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_free(&info));
387715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388715b587bSJunchao Zhang     }
389715b587bSJunchao Zhang   #endif
39020c24465SJunchao Zhang   }
391c2a741eeSJunchao Zhang #endif
392dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393d0609cedSBarry Smith   PetscOptionsEnd();
3943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39595fce210SBarry Smith }
39695fce210SBarry Smith 
39729046d53SLisandro Dalcin /*@
39895fce210SBarry Smith   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
39995fce210SBarry Smith 
40095fce210SBarry Smith   Logically Collective
40195fce210SBarry Smith 
4024165533cSJose E. Roman   Input Parameters:
40395fce210SBarry Smith + sf  - star forest
404cab54364SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
40595fce210SBarry Smith 
40695fce210SBarry Smith   Level: advanced
40795fce210SBarry Smith 
40820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
40995fce210SBarry Smith @*/
410d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411d71ae5a4SJacob Faibussowitsch {
41295fce210SBarry Smith   PetscFunctionBegin;
41395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
41495fce210SBarry Smith   PetscValidLogicalCollectiveBool(sf, flg, 2);
41528b400f6SJacob Faibussowitsch   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
41695fce210SBarry Smith   sf->rankorder = flg;
4173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41895fce210SBarry Smith }
41995fce210SBarry Smith 
4205d83a8b1SBarry Smith /*@
42195fce210SBarry Smith   PetscSFSetGraph - Set a parallel star forest
42295fce210SBarry Smith 
42395fce210SBarry Smith   Collective
42495fce210SBarry Smith 
4254165533cSJose E. Roman   Input Parameters:
42695fce210SBarry Smith + sf         - star forest
42795fce210SBarry Smith . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
42895fce210SBarry Smith . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
42920662ed9SBarry Smith . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
430c4e6a40aSLawrence Mitchell during setup in debug mode)
43120662ed9SBarry Smith . localmode  - copy mode for `ilocal`
432c4e6a40aSLawrence Mitchell . iremote    - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
433c4e6a40aSLawrence Mitchell during setup in debug mode)
43420662ed9SBarry Smith - remotemode - copy mode for `iremote`
43595fce210SBarry Smith 
43695fce210SBarry Smith   Level: intermediate
43795fce210SBarry Smith 
43895452b02SPatrick Sanan   Notes:
43920662ed9SBarry Smith   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
44038ab3f8aSBarry Smith 
44120662ed9SBarry Smith   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
44220662ed9SBarry Smith   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
443db2b9530SVaclav Hapla   PETSc might modify the respective array;
44420662ed9SBarry Smith   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
445cab54364SBarry Smith   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
446db2b9530SVaclav Hapla 
44738b5cf2dSJacob Faibussowitsch   Fortran Notes:
44820662ed9SBarry Smith   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449c4e6a40aSLawrence Mitchell 
45038b5cf2dSJacob Faibussowitsch   Developer Notes:
451db2b9530SVaclav Hapla   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
45220662ed9SBarry Smith   This also allows to compare leaf sets of two `PetscSF`s easily.
45372bf8598SVaclav Hapla 
45420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
45595fce210SBarry Smith @*/
456d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
457d71ae5a4SJacob Faibussowitsch {
458db2b9530SVaclav Hapla   PetscBool unique, contiguous;
45995fce210SBarry Smith 
46095fce210SBarry Smith   PetscFunctionBegin;
46195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
4624f572ea9SToby Isaac   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
4634f572ea9SToby Isaac   if (nleaves > 0) PetscAssertPointer(iremote, 6);
46408401ef6SPierre Jolivet   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
46508401ef6SPierre Jolivet   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
4668da24d32SBarry Smith   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
4678da24d32SBarry Smith    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
4688da24d32SBarry Smith   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
4698da24d32SBarry Smith   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
47029046d53SLisandro Dalcin 
4712a67d2daSStefano Zampini   if (sf->nroots >= 0) { /* Reset only if graph already set */
4729566063dSJacob Faibussowitsch     PetscCall(PetscSFReset(sf));
4732a67d2daSStefano Zampini   }
4742a67d2daSStefano Zampini 
4759566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
4766497c311SBarry Smith   if (PetscDefined(USE_DEBUG)) {
4776497c311SBarry Smith     PetscMPIInt size;
4786497c311SBarry Smith 
4796497c311SBarry Smith     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
4806497c311SBarry Smith     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
4816497c311SBarry Smith   }
48229046d53SLisandro Dalcin 
48395fce210SBarry Smith   sf->nroots  = nroots;
48495fce210SBarry Smith   sf->nleaves = nleaves;
48529046d53SLisandro Dalcin 
486db2b9530SVaclav Hapla   if (localmode == PETSC_COPY_VALUES && ilocal) {
487db2b9530SVaclav Hapla     PetscInt *tlocal = NULL;
488db2b9530SVaclav Hapla 
4899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tlocal));
4909566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491db2b9530SVaclav Hapla     ilocal = tlocal;
492db2b9530SVaclav Hapla   }
493db2b9530SVaclav Hapla   if (remotemode == PETSC_COPY_VALUES) {
494db2b9530SVaclav Hapla     PetscSFNode *tremote = NULL;
495db2b9530SVaclav Hapla 
4969566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tremote));
4979566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498db2b9530SVaclav Hapla     iremote = tremote;
499db2b9530SVaclav Hapla   }
500db2b9530SVaclav Hapla 
50129046d53SLisandro Dalcin   if (nleaves && ilocal) {
502db2b9530SVaclav Hapla     PetscSFNode work;
503db2b9530SVaclav Hapla 
5049566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
5059566063dSJacob Faibussowitsch     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506db2b9530SVaclav Hapla     unique = PetscNot(unique);
507db2b9530SVaclav Hapla     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508db2b9530SVaclav Hapla     sf->minleaf = ilocal[0];
509db2b9530SVaclav Hapla     sf->maxleaf = ilocal[nleaves - 1];
510db2b9530SVaclav Hapla     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
51129046d53SLisandro Dalcin   } else {
51229046d53SLisandro Dalcin     sf->minleaf = 0;
51329046d53SLisandro Dalcin     sf->maxleaf = nleaves - 1;
514db2b9530SVaclav Hapla     unique      = PETSC_TRUE;
515db2b9530SVaclav Hapla     contiguous  = PETSC_TRUE;
51629046d53SLisandro Dalcin   }
51729046d53SLisandro Dalcin 
518db2b9530SVaclav Hapla   if (contiguous) {
519db2b9530SVaclav Hapla     if (localmode == PETSC_USE_POINTER) {
520db2b9530SVaclav Hapla       ilocal = NULL;
521db2b9530SVaclav Hapla     } else {
5229566063dSJacob Faibussowitsch       PetscCall(PetscFree(ilocal));
523db2b9530SVaclav Hapla     }
524db2b9530SVaclav Hapla   }
525db2b9530SVaclav Hapla   sf->mine = ilocal;
526db2b9530SVaclav Hapla   if (localmode == PETSC_USE_POINTER) {
52729046d53SLisandro Dalcin     sf->mine_alloc = NULL;
528db2b9530SVaclav Hapla   } else {
529db2b9530SVaclav Hapla     sf->mine_alloc = ilocal;
53095fce210SBarry Smith   }
5316497c311SBarry Smith   if (PetscDefined(USE_DEBUG)) {
5326497c311SBarry Smith     PetscMPIInt size;
5336497c311SBarry Smith 
5346497c311SBarry Smith     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
5356497c311SBarry Smith     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
5366497c311SBarry Smith   }
537db2b9530SVaclav Hapla   sf->remote = iremote;
538db2b9530SVaclav Hapla   if (remotemode == PETSC_USE_POINTER) {
53929046d53SLisandro Dalcin     sf->remote_alloc = NULL;
540db2b9530SVaclav Hapla   } else {
541db2b9530SVaclav Hapla     sf->remote_alloc = iremote;
54295fce210SBarry Smith   }
5439566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
54429046d53SLisandro Dalcin   sf->graphset = PETSC_TRUE;
5453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
54695fce210SBarry Smith }
54795fce210SBarry Smith 
54829046d53SLisandro Dalcin /*@
549cab54364SBarry Smith   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550dd5b3ca6SJunchao Zhang 
551dd5b3ca6SJunchao Zhang   Collective
552dd5b3ca6SJunchao Zhang 
553dd5b3ca6SJunchao Zhang   Input Parameters:
554cab54364SBarry Smith + sf      - The `PetscSF`
555cab54364SBarry Smith . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557cab54364SBarry Smith 
558cab54364SBarry Smith   Level: intermediate
559dd5b3ca6SJunchao Zhang 
560dd5b3ca6SJunchao Zhang   Notes:
56120662ed9SBarry Smith   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
56220662ed9SBarry Smith   `n` and `N` are the local and global sizes of `x` respectively.
563dd5b3ca6SJunchao Zhang 
56420662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
56520662ed9SBarry Smith   sequential vectors `y` on all MPI processes.
566dd5b3ca6SJunchao Zhang 
56720662ed9SBarry Smith   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
56820662ed9SBarry Smith   sequential vector `y` on rank 0.
569dd5b3ca6SJunchao Zhang 
57020662ed9SBarry Smith   In above cases, entries of `x` are roots and entries of `y` are leaves.
571dd5b3ca6SJunchao Zhang 
57220662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
573dd5b3ca6SJunchao Zhang   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
574cab54364SBarry Smith   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
575dd5b3ca6SJunchao Zhang   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
576cab54364SBarry Smith   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
577dd5b3ca6SJunchao Zhang 
578dd5b3ca6SJunchao Zhang   In this case, roots and leaves are symmetric.
579dd5b3ca6SJunchao Zhang 
580cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
581dd5b3ca6SJunchao Zhang  @*/
582d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
583d71ae5a4SJacob Faibussowitsch {
584dd5b3ca6SJunchao Zhang   MPI_Comm    comm;
585dd5b3ca6SJunchao Zhang   PetscInt    n, N, res[2];
586dd5b3ca6SJunchao Zhang   PetscMPIInt rank, size;
587dd5b3ca6SJunchao Zhang   PetscSFType type;
588dd5b3ca6SJunchao Zhang 
589dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
5902abc8c78SJacob Faibussowitsch   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
5914f572ea9SToby Isaac   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
5929566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
5932c71b3e2SJacob Faibussowitsch   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
5949566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
596dd5b3ca6SJunchao Zhang 
597dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
598dd5b3ca6SJunchao Zhang     type = PETSCSFALLTOALL;
5999566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreate(comm, &sf->map));
6009566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
6019566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
6029566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(sf->map));
603dd5b3ca6SJunchao Zhang   } else {
6049566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetLocalSize(map, &n));
6059566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetSize(map, &N));
606dd5b3ca6SJunchao Zhang     res[0] = n;
607dd5b3ca6SJunchao Zhang     res[1] = -n;
608dd5b3ca6SJunchao Zhang     /* Check if n are same over all ranks so that we can optimize it */
6091c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
610dd5b3ca6SJunchao Zhang     if (res[0] == -res[1]) { /* same n */
611dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
612dd5b3ca6SJunchao Zhang     } else {
613dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
614dd5b3ca6SJunchao Zhang     }
6159566063dSJacob Faibussowitsch     PetscCall(PetscLayoutReference(map, &sf->map));
616dd5b3ca6SJunchao Zhang   }
6179566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, type));
618dd5b3ca6SJunchao Zhang 
619dd5b3ca6SJunchao Zhang   sf->pattern = pattern;
620dd5b3ca6SJunchao Zhang   sf->mine    = NULL; /* Contiguous */
621dd5b3ca6SJunchao Zhang 
622dd5b3ca6SJunchao Zhang   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
623dd5b3ca6SJunchao Zhang      Also set other easy stuff.
624dd5b3ca6SJunchao Zhang    */
625dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
626dd5b3ca6SJunchao Zhang     sf->nleaves = N;
627dd5b3ca6SJunchao Zhang     sf->nroots  = n;
628dd5b3ca6SJunchao Zhang     sf->nranks  = size;
629dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
630dd5b3ca6SJunchao Zhang     sf->maxleaf = N - 1;
631dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_GATHER) {
632dd5b3ca6SJunchao Zhang     sf->nleaves = rank ? 0 : N;
633dd5b3ca6SJunchao Zhang     sf->nroots  = n;
634dd5b3ca6SJunchao Zhang     sf->nranks  = rank ? 0 : size;
635dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
636dd5b3ca6SJunchao Zhang     sf->maxleaf = rank ? -1 : N - 1;
637dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
638dd5b3ca6SJunchao Zhang     sf->nleaves = size;
639dd5b3ca6SJunchao Zhang     sf->nroots  = size;
640dd5b3ca6SJunchao Zhang     sf->nranks  = size;
641dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
642dd5b3ca6SJunchao Zhang     sf->maxleaf = size - 1;
643dd5b3ca6SJunchao Zhang   }
644dd5b3ca6SJunchao Zhang   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
645dd5b3ca6SJunchao Zhang   sf->graphset = PETSC_TRUE;
6463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
647dd5b3ca6SJunchao Zhang }
648dd5b3ca6SJunchao Zhang 
649dd5b3ca6SJunchao Zhang /*@
650cab54364SBarry Smith   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
65195fce210SBarry Smith 
65295fce210SBarry Smith   Collective
65395fce210SBarry Smith 
6544165533cSJose E. Roman   Input Parameter:
65595fce210SBarry Smith . sf - star forest to invert
65695fce210SBarry Smith 
6574165533cSJose E. Roman   Output Parameter:
65820662ed9SBarry Smith . isf - inverse of `sf`
6594165533cSJose E. Roman 
66095fce210SBarry Smith   Level: advanced
66195fce210SBarry Smith 
66295fce210SBarry Smith   Notes:
66395fce210SBarry Smith   All roots must have degree 1.
66495fce210SBarry Smith 
66595fce210SBarry Smith   The local space may be a permutation, but cannot be sparse.
66695fce210SBarry Smith 
66720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
66895fce210SBarry Smith @*/
669d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
670d71ae5a4SJacob Faibussowitsch {
67195fce210SBarry Smith   PetscMPIInt     rank;
67295fce210SBarry Smith   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
67395fce210SBarry Smith   const PetscInt *ilocal;
67495fce210SBarry Smith   PetscSFNode    *roots, *leaves;
67595fce210SBarry Smith 
67695fce210SBarry Smith   PetscFunctionBegin;
67729046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
67829046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
6794f572ea9SToby Isaac   PetscAssertPointer(isf, 2);
68029046d53SLisandro Dalcin 
6819566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
68229046d53SLisandro Dalcin   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
68329046d53SLisandro Dalcin 
6849566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
6859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
686ae9aee6dSMatthew G. Knepley   for (i = 0; i < maxlocal; i++) {
68795fce210SBarry Smith     leaves[i].rank  = rank;
68895fce210SBarry Smith     leaves[i].index = i;
68995fce210SBarry Smith   }
69095fce210SBarry Smith   for (i = 0; i < nroots; i++) {
69195fce210SBarry Smith     roots[i].rank  = -1;
69295fce210SBarry Smith     roots[i].index = -1;
69395fce210SBarry Smith   }
6946497c311SBarry Smith   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
6956497c311SBarry Smith   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
69695fce210SBarry Smith 
69795fce210SBarry Smith   /* Check whether our leaves are sparse */
6989371c9d4SSatish Balay   for (i = 0, count = 0; i < nroots; i++)
6999371c9d4SSatish Balay     if (roots[i].rank >= 0) count++;
70095fce210SBarry Smith   if (count == nroots) newilocal = NULL;
7019371c9d4SSatish Balay   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
70295fce210SBarry Smith     for (i = 0, count = 0; i < nroots; i++) {
70395fce210SBarry Smith       if (roots[i].rank >= 0) {
70495fce210SBarry Smith         newilocal[count]   = i;
70595fce210SBarry Smith         roots[count].rank  = roots[i].rank;
70695fce210SBarry Smith         roots[count].index = roots[i].index;
70795fce210SBarry Smith         count++;
70895fce210SBarry Smith       }
70995fce210SBarry Smith     }
71095fce210SBarry Smith   }
71195fce210SBarry Smith 
7129566063dSJacob Faibussowitsch   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
7139566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
7149566063dSJacob Faibussowitsch   PetscCall(PetscFree2(roots, leaves));
7153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
71695fce210SBarry Smith }
71795fce210SBarry Smith 
71895fce210SBarry Smith /*@
719cab54364SBarry Smith   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
72095fce210SBarry Smith 
72195fce210SBarry Smith   Collective
72295fce210SBarry Smith 
7234165533cSJose E. Roman   Input Parameters:
72495fce210SBarry Smith + sf  - communication object to duplicate
725cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
72695fce210SBarry Smith 
7274165533cSJose E. Roman   Output Parameter:
72895fce210SBarry Smith . newsf - new communication object
72995fce210SBarry Smith 
73095fce210SBarry Smith   Level: beginner
73195fce210SBarry Smith 
73220662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
73395fce210SBarry Smith @*/
734d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
735d71ae5a4SJacob Faibussowitsch {
73629046d53SLisandro Dalcin   PetscSFType  type;
73797929ea7SJunchao Zhang   MPI_Datatype dtype = MPIU_SCALAR;
73895fce210SBarry Smith 
73995fce210SBarry Smith   PetscFunctionBegin;
74029046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
74129046d53SLisandro Dalcin   PetscValidLogicalCollectiveEnum(sf, opt, 2);
7424f572ea9SToby Isaac   PetscAssertPointer(newsf, 3);
7439566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
7449566063dSJacob Faibussowitsch   PetscCall(PetscSFGetType(sf, &type));
7459566063dSJacob Faibussowitsch   if (type) PetscCall(PetscSFSetType(*newsf, type));
74635cb6cd3SPierre Jolivet   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
74795fce210SBarry Smith   if (opt == PETSCSF_DUPLICATE_GRAPH) {
748dd5b3ca6SJunchao Zhang     PetscSFCheckGraphSet(sf, 1);
749dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
75095fce210SBarry Smith       PetscInt           nroots, nleaves;
75195fce210SBarry Smith       const PetscInt    *ilocal;
75295fce210SBarry Smith       const PetscSFNode *iremote;
7539566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
7549566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
755dd5b3ca6SJunchao Zhang     } else {
7569566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
757dd5b3ca6SJunchao Zhang     }
75895fce210SBarry Smith   }
75997929ea7SJunchao Zhang   /* Since oldtype is committed, so is newtype, according to MPI */
7609566063dSJacob Faibussowitsch   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
76197929ea7SJunchao Zhang   (*newsf)->vscat.bs     = sf->vscat.bs;
76297929ea7SJunchao Zhang   (*newsf)->vscat.unit   = dtype;
76397929ea7SJunchao Zhang   (*newsf)->vscat.to_n   = sf->vscat.to_n;
76497929ea7SJunchao Zhang   (*newsf)->vscat.from_n = sf->vscat.from_n;
76597929ea7SJunchao Zhang   /* Do not copy lsf. Build it on demand since it is rarely used */
76697929ea7SJunchao Zhang 
76720c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
76820c24465SJunchao Zhang   (*newsf)->backend              = sf->backend;
76971438e86SJunchao Zhang   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
77020c24465SJunchao Zhang   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
77120c24465SJunchao Zhang   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
77220c24465SJunchao Zhang #endif
773dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
77420c24465SJunchao Zhang   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
7753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
77695fce210SBarry Smith }
77795fce210SBarry Smith 
77895fce210SBarry Smith /*@C
77995fce210SBarry Smith   PetscSFGetGraph - Get the graph specifying a parallel star forest
78095fce210SBarry Smith 
78195fce210SBarry Smith   Not Collective
78295fce210SBarry Smith 
7834165533cSJose E. Roman   Input Parameter:
78495fce210SBarry Smith . sf - star forest
78595fce210SBarry Smith 
7864165533cSJose E. Roman   Output Parameters:
78795fce210SBarry Smith + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
78895fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process
78920662ed9SBarry Smith . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
79095fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process
79195fce210SBarry Smith 
792cab54364SBarry Smith   Level: intermediate
793cab54364SBarry Smith 
794373e0d91SLisandro Dalcin   Notes:
79520662ed9SBarry Smith   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
796373e0d91SLisandro Dalcin 
79720662ed9SBarry Smith   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
798db2b9530SVaclav Hapla 
7998dbb0df6SBarry Smith   Fortran Notes:
80020662ed9SBarry Smith   The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
80120662ed9SBarry Smith   want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
8028dbb0df6SBarry Smith 
80320662ed9SBarry Smith   To check for a `NULL` `ilocal` use
8048dbb0df6SBarry Smith $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
805ca797d7aSLawrence Mitchell 
80620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
80795fce210SBarry Smith @*/
808d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
809d71ae5a4SJacob Faibussowitsch {
81095fce210SBarry Smith   PetscFunctionBegin;
81195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
812b8dee149SJunchao Zhang   if (sf->ops->GetGraph) {
813f4f49eeaSPierre Jolivet     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
814b8dee149SJunchao Zhang   } else {
81595fce210SBarry Smith     if (nroots) *nroots = sf->nroots;
81695fce210SBarry Smith     if (nleaves) *nleaves = sf->nleaves;
81795fce210SBarry Smith     if (ilocal) *ilocal = sf->mine;
81895fce210SBarry Smith     if (iremote) *iremote = sf->remote;
819b8dee149SJunchao Zhang   }
8203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
82195fce210SBarry Smith }
82295fce210SBarry Smith 
82329046d53SLisandro Dalcin /*@
82495fce210SBarry Smith   PetscSFGetLeafRange - Get the active leaf ranges
82595fce210SBarry Smith 
82695fce210SBarry Smith   Not Collective
82795fce210SBarry Smith 
8284165533cSJose E. Roman   Input Parameter:
82995fce210SBarry Smith . sf - star forest
83095fce210SBarry Smith 
8314165533cSJose E. Roman   Output Parameters:
83220662ed9SBarry Smith + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
83320662ed9SBarry Smith - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
83495fce210SBarry Smith 
83595fce210SBarry Smith   Level: developer
83695fce210SBarry Smith 
83720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
83895fce210SBarry Smith @*/
839d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
840d71ae5a4SJacob Faibussowitsch {
84195fce210SBarry Smith   PetscFunctionBegin;
84295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
84329046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
84495fce210SBarry Smith   if (minleaf) *minleaf = sf->minleaf;
84595fce210SBarry Smith   if (maxleaf) *maxleaf = sf->maxleaf;
8463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
84795fce210SBarry Smith }
84895fce210SBarry Smith 
849ffeef943SBarry Smith /*@
850cab54364SBarry Smith   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
851fe2efc57SMark 
85220f4b53cSBarry Smith   Collective
853fe2efc57SMark 
854fe2efc57SMark   Input Parameters:
855fe2efc57SMark + A    - the star forest
856cab54364SBarry Smith . obj  - Optional object that provides the prefix for the option names
857736c3998SJose E. Roman - name - command line option
858fe2efc57SMark 
859fe2efc57SMark   Level: intermediate
860cab54364SBarry Smith 
86120662ed9SBarry Smith   Note:
86220662ed9SBarry Smith   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
86320662ed9SBarry Smith 
864db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
865fe2efc57SMark @*/
866d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
867d71ae5a4SJacob Faibussowitsch {
868fe2efc57SMark   PetscFunctionBegin;
869fe2efc57SMark   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
8709566063dSJacob Faibussowitsch   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
8713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
872fe2efc57SMark }
873fe2efc57SMark 
874ffeef943SBarry Smith /*@
87595fce210SBarry Smith   PetscSFView - view a star forest
87695fce210SBarry Smith 
87795fce210SBarry Smith   Collective
87895fce210SBarry Smith 
8794165533cSJose E. Roman   Input Parameters:
88095fce210SBarry Smith + sf     - star forest
881cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
88295fce210SBarry Smith 
88395fce210SBarry Smith   Level: beginner
88495fce210SBarry Smith 
885cab54364SBarry Smith .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
88695fce210SBarry Smith @*/
887d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
888d71ae5a4SJacob Faibussowitsch {
88995fce210SBarry Smith   PetscBool         iascii;
89095fce210SBarry Smith   PetscViewerFormat format;
89195fce210SBarry Smith 
89295fce210SBarry Smith   PetscFunctionBegin;
89395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
8949566063dSJacob Faibussowitsch   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
89595fce210SBarry Smith   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
89695fce210SBarry Smith   PetscCheckSameComm(sf, 1, viewer, 2);
8979566063dSJacob Faibussowitsch   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
8989566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
89953dd6d7dSJunchao Zhang   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
90095fce210SBarry Smith     PetscMPIInt rank;
9016497c311SBarry Smith     PetscInt    j;
90295fce210SBarry Smith 
9039566063dSJacob Faibussowitsch     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
9049566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPushTab(viewer));
905dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
90680153354SVaclav Hapla       if (!sf->graphset) {
9079566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
9089566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopTab(viewer));
9093ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
91080153354SVaclav Hapla       }
9119566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
9129566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9136497c311SBarry Smith       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
9146497c311SBarry Smith       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%d,%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, (PetscMPIInt)sf->remote[i].rank, sf->remote[i].index));
9159566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9169566063dSJacob Faibussowitsch       PetscCall(PetscViewerGetFormat(viewer, &format));
91795fce210SBarry Smith       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
91881bfa7aaSJed Brown         PetscMPIInt *tmpranks, *perm;
9196497c311SBarry Smith 
9209566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
9219566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
9226497c311SBarry Smith         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
9239566063dSJacob Faibussowitsch         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
9249566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
9256497c311SBarry Smith         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
9266497c311SBarry Smith           PetscMPIInt i = perm[ii];
9276497c311SBarry Smith 
9289566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
92948a46eb9SPierre Jolivet           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
93095fce210SBarry Smith         }
9319566063dSJacob Faibussowitsch         PetscCall(PetscFree2(tmpranks, perm));
93295fce210SBarry Smith       }
9339566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9349566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
935dd5b3ca6SJunchao Zhang     }
9369566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPopTab(viewer));
93795fce210SBarry Smith   }
938dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, View, viewer);
9393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
94095fce210SBarry Smith }
94195fce210SBarry Smith 
94295fce210SBarry Smith /*@C
943dec1416fSJunchao Zhang   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
94495fce210SBarry Smith 
94595fce210SBarry Smith   Not Collective
94695fce210SBarry Smith 
9474165533cSJose E. Roman   Input Parameter:
94895fce210SBarry Smith . sf - star forest
94995fce210SBarry Smith 
9504165533cSJose E. Roman   Output Parameters:
95195fce210SBarry Smith + nranks  - number of ranks referenced by local part
95220662ed9SBarry Smith . ranks   - [`nranks`] array of ranks
95320662ed9SBarry Smith . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
9546497c311SBarry Smith . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
9556497c311SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
95695fce210SBarry Smith 
95795fce210SBarry Smith   Level: developer
95895fce210SBarry Smith 
959cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
96095fce210SBarry Smith @*/
9616497c311SBarry Smith PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
962d71ae5a4SJacob Faibussowitsch {
96395fce210SBarry Smith   PetscFunctionBegin;
96495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
96528b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
966dec1416fSJunchao Zhang   if (sf->ops->GetRootRanks) {
9679927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
968dec1416fSJunchao Zhang   } else {
969dec1416fSJunchao Zhang     /* The generic implementation */
97095fce210SBarry Smith     if (nranks) *nranks = sf->nranks;
97195fce210SBarry Smith     if (ranks) *ranks = sf->ranks;
97295fce210SBarry Smith     if (roffset) *roffset = sf->roffset;
97395fce210SBarry Smith     if (rmine) *rmine = sf->rmine;
97495fce210SBarry Smith     if (rremote) *rremote = sf->rremote;
975dec1416fSJunchao Zhang   }
9763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
97795fce210SBarry Smith }
97895fce210SBarry Smith 
9798750ddebSJunchao Zhang /*@C
9808750ddebSJunchao Zhang   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
9818750ddebSJunchao Zhang 
9828750ddebSJunchao Zhang   Not Collective
9838750ddebSJunchao Zhang 
9844165533cSJose E. Roman   Input Parameter:
9858750ddebSJunchao Zhang . sf - star forest
9868750ddebSJunchao Zhang 
9874165533cSJose E. Roman   Output Parameters:
9888750ddebSJunchao Zhang + niranks  - number of leaf ranks referencing roots on this process
98920662ed9SBarry Smith . iranks   - [`niranks`] array of ranks
99020662ed9SBarry Smith . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
99120662ed9SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
9928750ddebSJunchao Zhang 
9938750ddebSJunchao Zhang   Level: developer
9948750ddebSJunchao Zhang 
995cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
9968750ddebSJunchao Zhang @*/
9976497c311SBarry Smith PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
998d71ae5a4SJacob Faibussowitsch {
9998750ddebSJunchao Zhang   PetscFunctionBegin;
10008750ddebSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
100128b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
10028750ddebSJunchao Zhang   if (sf->ops->GetLeafRanks) {
10039927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
10048750ddebSJunchao Zhang   } else {
10058750ddebSJunchao Zhang     PetscSFType type;
10069566063dSJacob Faibussowitsch     PetscCall(PetscSFGetType(sf, &type));
100798921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
10088750ddebSJunchao Zhang   }
10093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10108750ddebSJunchao Zhang }
10118750ddebSJunchao Zhang 
1012d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1013d71ae5a4SJacob Faibussowitsch {
1014b5a8e515SJed Brown   PetscInt i;
1015b5a8e515SJed Brown   for (i = 0; i < n; i++) {
1016b5a8e515SJed Brown     if (needle == list[i]) return PETSC_TRUE;
1017b5a8e515SJed Brown   }
1018b5a8e515SJed Brown   return PETSC_FALSE;
1019b5a8e515SJed Brown }
1020b5a8e515SJed Brown 
102195fce210SBarry Smith /*@C
1022cab54364SBarry Smith   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
102321c688dcSJed Brown 
102421c688dcSJed Brown   Collective
102521c688dcSJed Brown 
10264165533cSJose E. Roman   Input Parameters:
1027cab54364SBarry Smith + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1028cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
102921c688dcSJed Brown 
103021c688dcSJed Brown   Level: developer
103121c688dcSJed Brown 
1032cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
103321c688dcSJed Brown @*/
1034d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1035d71ae5a4SJacob Faibussowitsch {
1036eec179cfSJacob Faibussowitsch   PetscHMapI    table;
1037eec179cfSJacob Faibussowitsch   PetscHashIter pos;
10386497c311SBarry Smith   PetscMPIInt   size, groupsize, *groupranks, *ranks;
10396497c311SBarry Smith   PetscInt     *rcount;
10406497c311SBarry Smith   PetscInt      irank, sfnrank, ranksi;
10416497c311SBarry Smith   PetscMPIInt   i, orank = -1;
104221c688dcSJed Brown 
104321c688dcSJed Brown   PetscFunctionBegin;
104421c688dcSJed Brown   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
104529046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
10469566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1047eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(10, &table));
104821c688dcSJed Brown   for (i = 0; i < sf->nleaves; i++) {
104921c688dcSJed Brown     /* Log 1-based rank */
1050eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
105121c688dcSJed Brown   }
10526497c311SBarry Smith   PetscCall(PetscHMapIGetSize(table, &sfnrank));
10536497c311SBarry Smith   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
10549566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
10559566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1056eec179cfSJacob Faibussowitsch   PetscHashIterBegin(table, pos);
105721c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
10586497c311SBarry Smith     PetscHashIterGetKey(table, pos, ranksi);
10596497c311SBarry Smith     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1060eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(table, pos, rcount[i]);
1061eec179cfSJacob Faibussowitsch     PetscHashIterNext(table, pos);
106221c688dcSJed Brown     ranks[i]--; /* Convert back to 0-based */
106321c688dcSJed Brown   }
1064eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&table));
1065b5a8e515SJed Brown 
1066b5a8e515SJed Brown   /* We expect that dgroup is reliably "small" while nranks could be large */
1067b5a8e515SJed Brown   {
10687fb8a5e4SKarl Rupp     MPI_Group    group = MPI_GROUP_NULL;
1069b5a8e515SJed Brown     PetscMPIInt *dgroupranks;
10706497c311SBarry Smith 
10719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
10729566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
10739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
10749566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &groupranks));
1075b5a8e515SJed Brown     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
10769566063dSJacob Faibussowitsch     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
10779566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
10789566063dSJacob Faibussowitsch     PetscCall(PetscFree(dgroupranks));
1079b5a8e515SJed Brown   }
1080b5a8e515SJed Brown 
1081b5a8e515SJed Brown   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1082b5a8e515SJed Brown   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1083b5a8e515SJed Brown     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1084b5a8e515SJed Brown       if (InList(ranks[i], groupsize, groupranks)) break;
1085b5a8e515SJed Brown     }
1086b5a8e515SJed Brown     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1087b5a8e515SJed Brown       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1088b5a8e515SJed Brown     }
1089b5a8e515SJed Brown     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
10906497c311SBarry Smith       PetscMPIInt tmprank;
10916497c311SBarry Smith       PetscInt    tmpcount;
1092247e8311SStefano Zampini 
1093b5a8e515SJed Brown       tmprank             = ranks[i];
1094b5a8e515SJed Brown       tmpcount            = rcount[i];
1095b5a8e515SJed Brown       ranks[i]            = ranks[sf->ndranks];
1096b5a8e515SJed Brown       rcount[i]           = rcount[sf->ndranks];
1097b5a8e515SJed Brown       ranks[sf->ndranks]  = tmprank;
1098b5a8e515SJed Brown       rcount[sf->ndranks] = tmpcount;
1099b5a8e515SJed Brown       sf->ndranks++;
1100b5a8e515SJed Brown     }
1101b5a8e515SJed Brown   }
11029566063dSJacob Faibussowitsch   PetscCall(PetscFree(groupranks));
11036497c311SBarry Smith   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
11046497c311SBarry Smith   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
110521c688dcSJed Brown   sf->roffset[0] = 0;
110621c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
11079566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
110821c688dcSJed Brown     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
110921c688dcSJed Brown     rcount[i]          = 0;
111021c688dcSJed Brown   }
1111247e8311SStefano Zampini   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1112247e8311SStefano Zampini     /* short circuit */
1113247e8311SStefano Zampini     if (orank != sf->remote[i].rank) {
111421c688dcSJed Brown       /* Search for index of iremote[i].rank in sf->ranks */
11156497c311SBarry Smith       PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1116b5a8e515SJed Brown       if (irank < 0) {
11176497c311SBarry Smith         PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1118b5a8e515SJed Brown         if (irank >= 0) irank += sf->ndranks;
111921c688dcSJed Brown       }
11206497c311SBarry Smith       orank = (PetscMPIInt)sf->remote[i].rank;
1121247e8311SStefano Zampini     }
11226497c311SBarry Smith     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", (PetscMPIInt)sf->remote[i].rank);
112321c688dcSJed Brown     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
112421c688dcSJed Brown     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
112521c688dcSJed Brown     rcount[irank]++;
112621c688dcSJed Brown   }
11279566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rcount, ranks));
11283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
112921c688dcSJed Brown }
113021c688dcSJed Brown 
113121c688dcSJed Brown /*@C
113295fce210SBarry Smith   PetscSFGetGroups - gets incoming and outgoing process groups
113395fce210SBarry Smith 
113495fce210SBarry Smith   Collective
113595fce210SBarry Smith 
11364165533cSJose E. Roman   Input Parameter:
113795fce210SBarry Smith . sf - star forest
113895fce210SBarry Smith 
11394165533cSJose E. Roman   Output Parameters:
114095fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots)
114195fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference)
114295fce210SBarry Smith 
114395fce210SBarry Smith   Level: developer
114495fce210SBarry Smith 
1145cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
114695fce210SBarry Smith @*/
1147d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1148d71ae5a4SJacob Faibussowitsch {
11497fb8a5e4SKarl Rupp   MPI_Group group = MPI_GROUP_NULL;
115095fce210SBarry Smith 
115195fce210SBarry Smith   PetscFunctionBegin;
115208401ef6SPierre Jolivet   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
115395fce210SBarry Smith   if (sf->ingroup == MPI_GROUP_NULL) {
115495fce210SBarry Smith     PetscInt        i;
115595fce210SBarry Smith     const PetscInt *indegree;
11566497c311SBarry Smith     PetscMPIInt     rank, *outranks, *inranks, indegree0;
115795fce210SBarry Smith     PetscSFNode    *remote;
115895fce210SBarry Smith     PetscSF         bgcount;
115995fce210SBarry Smith 
116095fce210SBarry Smith     /* Compute the number of incoming ranks */
11619566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nranks, &remote));
116295fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) {
116395fce210SBarry Smith       remote[i].rank  = sf->ranks[i];
116495fce210SBarry Smith       remote[i].index = 0;
116595fce210SBarry Smith     }
11669566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
11679566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
11689566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
11699566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
117095fce210SBarry Smith     /* Enumerate the incoming ranks */
11719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
11729566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
117395fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
11749566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
11759566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
11769566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11776497c311SBarry Smith     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
11786497c311SBarry Smith     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
11799566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
11809566063dSJacob Faibussowitsch     PetscCall(PetscFree2(inranks, outranks));
11819566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&bgcount));
118295fce210SBarry Smith   }
118395fce210SBarry Smith   *incoming = sf->ingroup;
118495fce210SBarry Smith 
118595fce210SBarry Smith   if (sf->outgroup == MPI_GROUP_NULL) {
11869566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11879566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
11889566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
118995fce210SBarry Smith   }
119095fce210SBarry Smith   *outgoing = sf->outgroup;
11913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
119295fce210SBarry Smith }
119395fce210SBarry Smith 
119429046d53SLisandro Dalcin /*@
11950dd791a8SStefano Zampini   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
11960dd791a8SStefano Zampini 
11970dd791a8SStefano Zampini   Collective
11980dd791a8SStefano Zampini 
11990dd791a8SStefano Zampini   Input Parameter:
12000dd791a8SStefano Zampini . sf - star forest
12010dd791a8SStefano Zampini 
12020dd791a8SStefano Zampini   Output Parameter:
12030dd791a8SStefano Zampini . rsf - the star forest with a single root per process to perform communications
12040dd791a8SStefano Zampini 
12050dd791a8SStefano Zampini   Level: developer
12060dd791a8SStefano Zampini 
12070dd791a8SStefano Zampini .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
12080dd791a8SStefano Zampini @*/
12090dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
12100dd791a8SStefano Zampini {
12110dd791a8SStefano Zampini   PetscFunctionBegin;
12120dd791a8SStefano Zampini   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12130dd791a8SStefano Zampini   PetscAssertPointer(rsf, 2);
12140dd791a8SStefano Zampini   if (!sf->rankssf) {
12150dd791a8SStefano Zampini     PetscSFNode       *rremotes;
12160dd791a8SStefano Zampini     const PetscMPIInt *ranks;
12176497c311SBarry Smith     PetscMPIInt        nranks;
12180dd791a8SStefano Zampini 
12190dd791a8SStefano Zampini     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
12200dd791a8SStefano Zampini     PetscCall(PetscMalloc1(nranks, &rremotes));
12210dd791a8SStefano Zampini     for (PetscInt i = 0; i < nranks; i++) {
12220dd791a8SStefano Zampini       rremotes[i].rank  = ranks[i];
12230dd791a8SStefano Zampini       rremotes[i].index = 0;
12240dd791a8SStefano Zampini     }
12250dd791a8SStefano Zampini     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
12260dd791a8SStefano Zampini     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
12270dd791a8SStefano Zampini   }
12280dd791a8SStefano Zampini   *rsf = sf->rankssf;
12290dd791a8SStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
12300dd791a8SStefano Zampini }
12310dd791a8SStefano Zampini 
12320dd791a8SStefano Zampini /*@
1233cab54364SBarry Smith   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
123495fce210SBarry Smith 
123595fce210SBarry Smith   Collective
123695fce210SBarry Smith 
12374165533cSJose E. Roman   Input Parameter:
123895fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex
123995fce210SBarry Smith 
12404165533cSJose E. Roman   Output Parameter:
124195fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1
124295fce210SBarry Smith 
124395fce210SBarry Smith   Level: developer
124495fce210SBarry Smith 
1245cab54364SBarry Smith   Note:
1246cab54364SBarry Smith   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
124795fce210SBarry Smith   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
124895fce210SBarry Smith   edge, it is a candidate for future optimization that might involve its removal.
124995fce210SBarry Smith 
1250cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
125195fce210SBarry Smith @*/
1252d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1253d71ae5a4SJacob Faibussowitsch {
125495fce210SBarry Smith   PetscFunctionBegin;
125595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12564f572ea9SToby Isaac   PetscAssertPointer(multi, 2);
125795fce210SBarry Smith   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
12589566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
125995fce210SBarry Smith     *multi           = sf->multi;
1260013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12613ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
126295fce210SBarry Smith   }
126395fce210SBarry Smith   if (!sf->multi) {
126495fce210SBarry Smith     const PetscInt *indegree;
12659837ea96SMatthew G. Knepley     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
126695fce210SBarry Smith     PetscSFNode    *remote;
126729046d53SLisandro Dalcin     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
12689566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
12699566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
12709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
127195fce210SBarry Smith     inoffset[0] = 0;
127295fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
12739837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) outones[i] = 1;
12749566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
12759566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
127695fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
127776bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1278ad540459SPierre Jolivet       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
127976bd3646SJed Brown     }
12809566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nleaves, &remote));
128195fce210SBarry Smith     for (i = 0; i < sf->nleaves; i++) {
128295fce210SBarry Smith       remote[i].rank  = sf->remote[i].rank;
128338e7336fSToby Isaac       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
128495fce210SBarry Smith     }
12859566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1286013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12879566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
128895fce210SBarry Smith     if (sf->rankorder) { /* Sort the ranks */
128995fce210SBarry Smith       PetscMPIInt  rank;
129095fce210SBarry Smith       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
129195fce210SBarry Smith       PetscSFNode *newremote;
12929566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
129395fce210SBarry Smith       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
12949566063dSJacob Faibussowitsch       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
12959837ea96SMatthew G. Knepley       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
12969566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
12979566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
129895fce210SBarry Smith       /* Sort the incoming ranks at each vertex, build the inverse map */
129995fce210SBarry Smith       for (i = 0; i < sf->nroots; i++) {
130095fce210SBarry Smith         PetscInt j;
130195fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
13028e3a54c0SPierre Jolivet         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
130395fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
130495fce210SBarry Smith       }
13059566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13069566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13079566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
130895fce210SBarry Smith       for (i = 0; i < sf->nleaves; i++) {
130995fce210SBarry Smith         newremote[i].rank  = sf->remote[i].rank;
131001365b40SToby Isaac         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
131195fce210SBarry Smith       }
13129566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
13139566063dSJacob Faibussowitsch       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
131495fce210SBarry Smith     }
13159566063dSJacob Faibussowitsch     PetscCall(PetscFree3(inoffset, outones, outoffset));
131695fce210SBarry Smith   }
131795fce210SBarry Smith   *multi = sf->multi;
13183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
131995fce210SBarry Smith }
132095fce210SBarry Smith 
132195fce210SBarry Smith /*@C
132220662ed9SBarry Smith   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
132395fce210SBarry Smith 
132495fce210SBarry Smith   Collective
132595fce210SBarry Smith 
13264165533cSJose E. Roman   Input Parameters:
132795fce210SBarry Smith + sf        - original star forest
1328ba2a7774SJunchao Zhang . nselected - number of selected roots on this process
1329ba2a7774SJunchao Zhang - selected  - indices of the selected roots on this process
133095fce210SBarry Smith 
13314165533cSJose E. Roman   Output Parameter:
1332cd620004SJunchao Zhang . esf - new star forest
133395fce210SBarry Smith 
133495fce210SBarry Smith   Level: advanced
133595fce210SBarry Smith 
133695fce210SBarry Smith   Note:
1337cab54364SBarry Smith   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
133895fce210SBarry Smith   be done by calling PetscSFGetGraph().
133995fce210SBarry Smith 
1340cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
134195fce210SBarry Smith @*/
1342d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1343d71ae5a4SJacob Faibussowitsch {
1344cd620004SJunchao Zhang   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1345cd620004SJunchao Zhang   const PetscInt    *ilocal;
1346cd620004SJunchao Zhang   signed char       *rootdata, *leafdata, *leafmem;
1347ba2a7774SJunchao Zhang   const PetscSFNode *iremote;
1348f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1349f659e5c7SJunchao Zhang   MPI_Comm           comm;
135095fce210SBarry Smith 
135195fce210SBarry Smith   PetscFunctionBegin;
135295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
135329046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
13544f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
13554f572ea9SToby Isaac   PetscAssertPointer(esf, 4);
13560511a646SMatthew G. Knepley 
13579566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
13589566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
13599566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
13609566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1361cd620004SJunchao Zhang 
136276bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1363cd620004SJunchao Zhang     PetscBool dups;
13649566063dSJacob Faibussowitsch     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
136528b400f6SJacob Faibussowitsch     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1366511e6246SStefano Zampini     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1367cd620004SJunchao Zhang   }
1368f659e5c7SJunchao Zhang 
1369dbbe0bcdSBarry Smith   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1370dbbe0bcdSBarry Smith   else {
1371cd620004SJunchao Zhang     /* A generic version of creating embedded sf */
13729566063dSJacob Faibussowitsch     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1373cd620004SJunchao Zhang     maxlocal = maxleaf - minleaf + 1;
13749566063dSJacob Faibussowitsch     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
13758e3a54c0SPierre Jolivet     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1376cd620004SJunchao Zhang     /* Tag selected roots and bcast to leaves */
1377cd620004SJunchao Zhang     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
13789566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
13799566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1380ba2a7774SJunchao Zhang 
1381cd620004SJunchao Zhang     /* Build esf with leaves that are still connected */
1382cd620004SJunchao Zhang     esf_nleaves = 0;
1383cd620004SJunchao Zhang     for (i = 0; i < nleaves; i++) {
1384cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1385cd620004SJunchao Zhang       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1386cd620004SJunchao Zhang          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1387cd620004SJunchao Zhang       */
1388cd620004SJunchao Zhang       esf_nleaves += (leafdata[j] ? 1 : 0);
1389cd620004SJunchao Zhang     }
13909566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
13919566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1392cd620004SJunchao Zhang     for (i = n = 0; i < nleaves; i++) {
1393cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1394cd620004SJunchao Zhang       if (leafdata[j]) {
1395cd620004SJunchao Zhang         new_ilocal[n]        = j;
1396cd620004SJunchao Zhang         new_iremote[n].rank  = iremote[i].rank;
1397cd620004SJunchao Zhang         new_iremote[n].index = iremote[i].index;
1398fc1ede2bSMatthew G. Knepley         ++n;
139995fce210SBarry Smith       }
140095fce210SBarry Smith     }
14019566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, esf));
14029566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(*esf));
14039566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
14049566063dSJacob Faibussowitsch     PetscCall(PetscFree2(rootdata, leafmem));
1405f659e5c7SJunchao Zhang   }
14069566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
14073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
140895fce210SBarry Smith }
140995fce210SBarry Smith 
14102f5fb4c2SMatthew G. Knepley /*@C
141120662ed9SBarry Smith   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
14122f5fb4c2SMatthew G. Knepley 
14132f5fb4c2SMatthew G. Knepley   Collective
14142f5fb4c2SMatthew G. Knepley 
14154165533cSJose E. Roman   Input Parameters:
14162f5fb4c2SMatthew G. Knepley + sf        - original star forest
1417f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process
1418f659e5c7SJunchao Zhang - selected  - indices of the selected leaves on this process
14192f5fb4c2SMatthew G. Knepley 
14204165533cSJose E. Roman   Output Parameter:
14212f5fb4c2SMatthew G. Knepley . newsf - new star forest
14222f5fb4c2SMatthew G. Knepley 
14232f5fb4c2SMatthew G. Knepley   Level: advanced
14242f5fb4c2SMatthew G. Knepley 
1425cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
14262f5fb4c2SMatthew G. Knepley @*/
1427d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1428d71ae5a4SJacob Faibussowitsch {
1429f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
1430f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1431f659e5c7SJunchao Zhang   const PetscInt    *ilocal;
1432f659e5c7SJunchao Zhang   PetscInt           i, nroots, *leaves, *new_ilocal;
1433f659e5c7SJunchao Zhang   MPI_Comm           comm;
14342f5fb4c2SMatthew G. Knepley 
14352f5fb4c2SMatthew G. Knepley   PetscFunctionBegin;
14362f5fb4c2SMatthew G. Knepley   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
143729046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
14384f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
14394f572ea9SToby Isaac   PetscAssertPointer(newsf, 4);
14402f5fb4c2SMatthew G. Knepley 
1441f659e5c7SJunchao Zhang   /* Uniq selected[] and put results in leaves[] */
14429566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
14439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &leaves));
14449566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(leaves, selected, nselected));
14459566063dSJacob Faibussowitsch   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
144608401ef6SPierre Jolivet   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1447f659e5c7SJunchao Zhang 
1448f659e5c7SJunchao Zhang   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1449dbbe0bcdSBarry Smith   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1450dbbe0bcdSBarry Smith   else {
14519566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
14529566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_ilocal));
14539566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_iremote));
1454f659e5c7SJunchao Zhang     for (i = 0; i < nselected; ++i) {
1455f659e5c7SJunchao Zhang       const PetscInt l     = leaves[i];
1456f659e5c7SJunchao Zhang       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1457f659e5c7SJunchao Zhang       new_iremote[i].rank  = iremote[l].rank;
1458f659e5c7SJunchao Zhang       new_iremote[i].index = iremote[l].index;
14592f5fb4c2SMatthew G. Knepley     }
14609566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
14619566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1462f659e5c7SJunchao Zhang   }
14639566063dSJacob Faibussowitsch   PetscCall(PetscFree(leaves));
14643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14652f5fb4c2SMatthew G. Knepley }
14662f5fb4c2SMatthew G. Knepley 
146795fce210SBarry Smith /*@C
1468cab54364SBarry Smith   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
14693482bfa8SJunchao Zhang 
1470c3339decSBarry Smith   Collective
14713482bfa8SJunchao Zhang 
14724165533cSJose E. Roman   Input Parameters:
14733482bfa8SJunchao Zhang + sf       - star forest on which to communicate
14743482bfa8SJunchao Zhang . unit     - data type associated with each node
14753482bfa8SJunchao Zhang . rootdata - buffer to broadcast
14763482bfa8SJunchao Zhang - op       - operation to use for reduction
14773482bfa8SJunchao Zhang 
14784165533cSJose E. Roman   Output Parameter:
14793482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
14803482bfa8SJunchao Zhang 
14813482bfa8SJunchao Zhang   Level: intermediate
14823482bfa8SJunchao Zhang 
148320662ed9SBarry Smith   Note:
148420662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1485da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1486cab54364SBarry Smith   use `PetscSFBcastWithMemTypeBegin()` instead.
1487cab54364SBarry Smith 
1488cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
14893482bfa8SJunchao Zhang @*/
1490d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1491d71ae5a4SJacob Faibussowitsch {
1492eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
14933482bfa8SJunchao Zhang 
14943482bfa8SJunchao Zhang   PetscFunctionBegin;
14953482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14969566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
14979566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
14989566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
14999566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1500dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15019566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15033482bfa8SJunchao Zhang }
15043482bfa8SJunchao Zhang 
15053482bfa8SJunchao Zhang /*@C
150620662ed9SBarry Smith   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
150720662ed9SBarry Smith   to `PetscSFBcastEnd()`
1508d0295fc0SJunchao Zhang 
1509c3339decSBarry Smith   Collective
1510d0295fc0SJunchao Zhang 
15114165533cSJose E. Roman   Input Parameters:
1512d0295fc0SJunchao Zhang + sf        - star forest on which to communicate
1513d0295fc0SJunchao Zhang . unit      - data type associated with each node
1514d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1515d0295fc0SJunchao Zhang . rootdata  - buffer to broadcast
1516d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1517d0295fc0SJunchao Zhang - op        - operation to use for reduction
1518d0295fc0SJunchao Zhang 
15194165533cSJose E. Roman   Output Parameter:
1520d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
1521d0295fc0SJunchao Zhang 
1522d0295fc0SJunchao Zhang   Level: intermediate
1523d0295fc0SJunchao Zhang 
1524cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1525d0295fc0SJunchao Zhang @*/
1526d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1527d71ae5a4SJacob Faibussowitsch {
1528d0295fc0SJunchao Zhang   PetscFunctionBegin;
1529d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15309566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15319566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1532dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15339566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1535d0295fc0SJunchao Zhang }
1536d0295fc0SJunchao Zhang 
1537d0295fc0SJunchao Zhang /*@C
153820662ed9SBarry Smith   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
15393482bfa8SJunchao Zhang 
15403482bfa8SJunchao Zhang   Collective
15413482bfa8SJunchao Zhang 
15424165533cSJose E. Roman   Input Parameters:
15433482bfa8SJunchao Zhang + sf       - star forest
15443482bfa8SJunchao Zhang . unit     - data type
15453482bfa8SJunchao Zhang . rootdata - buffer to broadcast
15463482bfa8SJunchao Zhang - op       - operation to use for reduction
15473482bfa8SJunchao Zhang 
15484165533cSJose E. Roman   Output Parameter:
15493482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
15503482bfa8SJunchao Zhang 
15513482bfa8SJunchao Zhang   Level: intermediate
15523482bfa8SJunchao Zhang 
1553cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
15543482bfa8SJunchao Zhang @*/
1555d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1556d71ae5a4SJacob Faibussowitsch {
15573482bfa8SJunchao Zhang   PetscFunctionBegin;
15583482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15599566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1560dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
15619566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
15623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15633482bfa8SJunchao Zhang }
15643482bfa8SJunchao Zhang 
15653482bfa8SJunchao Zhang /*@C
1566cab54364SBarry Smith   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
156795fce210SBarry Smith 
156895fce210SBarry Smith   Collective
156995fce210SBarry Smith 
15704165533cSJose E. Roman   Input Parameters:
157195fce210SBarry Smith + sf       - star forest
157295fce210SBarry Smith . unit     - data type
157395fce210SBarry Smith . leafdata - values to reduce
157495fce210SBarry Smith - op       - reduction operation
157595fce210SBarry Smith 
15764165533cSJose E. Roman   Output Parameter:
157795fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
157895fce210SBarry Smith 
157995fce210SBarry Smith   Level: intermediate
158095fce210SBarry Smith 
158120662ed9SBarry Smith   Note:
158220662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1583da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1584cab54364SBarry Smith   use `PetscSFReduceWithMemTypeBegin()` instead.
1585d0295fc0SJunchao Zhang 
158620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
158795fce210SBarry Smith @*/
1588d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1589d71ae5a4SJacob Faibussowitsch {
1590eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
159195fce210SBarry Smith 
159295fce210SBarry Smith   PetscFunctionBegin;
159395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15949566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15959566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15969566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
15979566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1598f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
15999566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
160195fce210SBarry Smith }
160295fce210SBarry Smith 
160395fce210SBarry Smith /*@C
1604cab54364SBarry Smith   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1605d0295fc0SJunchao Zhang 
1606d0295fc0SJunchao Zhang   Collective
1607d0295fc0SJunchao Zhang 
16084165533cSJose E. Roman   Input Parameters:
1609d0295fc0SJunchao Zhang + sf        - star forest
1610d0295fc0SJunchao Zhang . unit      - data type
1611d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1612d0295fc0SJunchao Zhang . leafdata  - values to reduce
1613d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1614d0295fc0SJunchao Zhang - op        - reduction operation
1615d0295fc0SJunchao Zhang 
16164165533cSJose E. Roman   Output Parameter:
1617d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root
1618d0295fc0SJunchao Zhang 
1619d0295fc0SJunchao Zhang   Level: intermediate
1620d0295fc0SJunchao Zhang 
162120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1622d0295fc0SJunchao Zhang @*/
1623d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1624d71ae5a4SJacob Faibussowitsch {
1625d0295fc0SJunchao Zhang   PetscFunctionBegin;
1626d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16279566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16289566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1629f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
16309566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1632d0295fc0SJunchao Zhang }
1633d0295fc0SJunchao Zhang 
1634d0295fc0SJunchao Zhang /*@C
163520662ed9SBarry Smith   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
163695fce210SBarry Smith 
163795fce210SBarry Smith   Collective
163895fce210SBarry Smith 
16394165533cSJose E. Roman   Input Parameters:
164095fce210SBarry Smith + sf       - star forest
164195fce210SBarry Smith . unit     - data type
164295fce210SBarry Smith . leafdata - values to reduce
164395fce210SBarry Smith - op       - reduction operation
164495fce210SBarry Smith 
16454165533cSJose E. Roman   Output Parameter:
164695fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
164795fce210SBarry Smith 
164895fce210SBarry Smith   Level: intermediate
164995fce210SBarry Smith 
165020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
165195fce210SBarry Smith @*/
1652d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1653d71ae5a4SJacob Faibussowitsch {
165495fce210SBarry Smith   PetscFunctionBegin;
165595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16569566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1657dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
16589566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
16593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
166095fce210SBarry Smith }
166195fce210SBarry Smith 
166295fce210SBarry Smith /*@C
1663cab54364SBarry Smith   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1664cab54364SBarry Smith   to be completed with `PetscSFFetchAndOpEnd()`
1665a1729e3fSJunchao Zhang 
1666a1729e3fSJunchao Zhang   Collective
1667a1729e3fSJunchao Zhang 
16684165533cSJose E. Roman   Input Parameters:
1669a1729e3fSJunchao Zhang + sf       - star forest
1670a1729e3fSJunchao Zhang . unit     - data type
1671a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1672a1729e3fSJunchao Zhang - op       - operation to use for reduction
1673a1729e3fSJunchao Zhang 
16744165533cSJose E. Roman   Output Parameters:
1675a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1676a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1677a1729e3fSJunchao Zhang 
1678a1729e3fSJunchao Zhang   Level: advanced
1679a1729e3fSJunchao Zhang 
1680a1729e3fSJunchao Zhang   Note:
1681a1729e3fSJunchao Zhang   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1682a1729e3fSJunchao Zhang   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1683a1729e3fSJunchao Zhang   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1684a1729e3fSJunchao Zhang   integers.
1685a1729e3fSJunchao Zhang 
1686cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1687a1729e3fSJunchao Zhang @*/
1688d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1689d71ae5a4SJacob Faibussowitsch {
1690eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1691a1729e3fSJunchao Zhang 
1692a1729e3fSJunchao Zhang   PetscFunctionBegin;
1693a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16949566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16959566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16969566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
16979566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
16989566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
169908401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1700dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17019566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1703a1729e3fSJunchao Zhang }
1704a1729e3fSJunchao Zhang 
1705a1729e3fSJunchao Zhang /*@C
1706cab54364SBarry Smith   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1707cab54364SBarry Smith   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1708d3b3e55cSJunchao Zhang 
1709d3b3e55cSJunchao Zhang   Collective
1710d3b3e55cSJunchao Zhang 
1711d3b3e55cSJunchao Zhang   Input Parameters:
1712d3b3e55cSJunchao Zhang + sf              - star forest
1713d3b3e55cSJunchao Zhang . unit            - data type
1714d3b3e55cSJunchao Zhang . rootmtype       - memory type of rootdata
1715d3b3e55cSJunchao Zhang . leafmtype       - memory type of leafdata
1716d3b3e55cSJunchao Zhang . leafdata        - leaf values to use in reduction
1717d3b3e55cSJunchao Zhang . leafupdatemtype - memory type of leafupdate
1718d3b3e55cSJunchao Zhang - op              - operation to use for reduction
1719d3b3e55cSJunchao Zhang 
1720d3b3e55cSJunchao Zhang   Output Parameters:
1721d3b3e55cSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1722d3b3e55cSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1723d3b3e55cSJunchao Zhang 
1724d3b3e55cSJunchao Zhang   Level: advanced
1725d3b3e55cSJunchao Zhang 
1726cab54364SBarry Smith   Note:
1727cab54364SBarry Smith   See `PetscSFFetchAndOpBegin()` for more details.
1728d3b3e55cSJunchao Zhang 
172920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1730d3b3e55cSJunchao Zhang @*/
1731d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1732d71ae5a4SJacob Faibussowitsch {
1733d3b3e55cSJunchao Zhang   PetscFunctionBegin;
1734d3b3e55cSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17359566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
17369566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
173708401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1738dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17399566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1741d3b3e55cSJunchao Zhang }
1742d3b3e55cSJunchao Zhang 
1743d3b3e55cSJunchao Zhang /*@C
174420662ed9SBarry Smith   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
174520662ed9SBarry Smith   to fetch values from roots and update atomically by applying operation using my leaf value
1746a1729e3fSJunchao Zhang 
1747a1729e3fSJunchao Zhang   Collective
1748a1729e3fSJunchao Zhang 
17494165533cSJose E. Roman   Input Parameters:
1750a1729e3fSJunchao Zhang + sf       - star forest
1751a1729e3fSJunchao Zhang . unit     - data type
1752a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1753a1729e3fSJunchao Zhang - op       - operation to use for reduction
1754a1729e3fSJunchao Zhang 
17554165533cSJose E. Roman   Output Parameters:
1756a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1757a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1758a1729e3fSJunchao Zhang 
1759a1729e3fSJunchao Zhang   Level: advanced
1760a1729e3fSJunchao Zhang 
176120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1762a1729e3fSJunchao Zhang @*/
1763d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1764d71ae5a4SJacob Faibussowitsch {
1765a1729e3fSJunchao Zhang   PetscFunctionBegin;
1766a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17679566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1768dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
17699566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
17703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1771a1729e3fSJunchao Zhang }
1772a1729e3fSJunchao Zhang 
1773a1729e3fSJunchao Zhang /*@C
1774cab54364SBarry Smith   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
177595fce210SBarry Smith 
177695fce210SBarry Smith   Collective
177795fce210SBarry Smith 
17784165533cSJose E. Roman   Input Parameter:
177995fce210SBarry Smith . sf - star forest
178095fce210SBarry Smith 
17814165533cSJose E. Roman   Output Parameter:
178295fce210SBarry Smith . degree - degree of each root vertex
178395fce210SBarry Smith 
178495fce210SBarry Smith   Level: advanced
178595fce210SBarry Smith 
1786cab54364SBarry Smith   Note:
178720662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1788ffe67aa5SVáclav Hapla 
1789cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
179095fce210SBarry Smith @*/
17916497c311SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1792d71ae5a4SJacob Faibussowitsch {
179395fce210SBarry Smith   PetscFunctionBegin;
179495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
179595fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
17964f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
1797803bd9e8SMatthew G. Knepley   if (!sf->degreeknown) {
17985b0d146aSStefano Zampini     PetscInt i, nroots = sf->nroots, maxlocal;
179928b400f6SJacob Faibussowitsch     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
18005b0d146aSStefano Zampini     maxlocal = sf->maxleaf - sf->minleaf + 1;
18019566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nroots, &sf->degree));
18029566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
180329046d53SLisandro Dalcin     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
18049837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
18059566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
180695fce210SBarry Smith   }
180795fce210SBarry Smith   *degree = NULL;
18083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
180995fce210SBarry Smith }
181095fce210SBarry Smith 
181195fce210SBarry Smith /*@C
1812cab54364SBarry Smith   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
181395fce210SBarry Smith 
181495fce210SBarry Smith   Collective
181595fce210SBarry Smith 
18164165533cSJose E. Roman   Input Parameter:
181795fce210SBarry Smith . sf - star forest
181895fce210SBarry Smith 
18194165533cSJose E. Roman   Output Parameter:
182095fce210SBarry Smith . degree - degree of each root vertex
182195fce210SBarry Smith 
182295fce210SBarry Smith   Level: developer
182395fce210SBarry Smith 
1824cab54364SBarry Smith   Note:
182520662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1826ffe67aa5SVáclav Hapla 
1827cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
182895fce210SBarry Smith @*/
1829d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1830d71ae5a4SJacob Faibussowitsch {
183195fce210SBarry Smith   PetscFunctionBegin;
183295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
183395fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
18344f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
183595fce210SBarry Smith   if (!sf->degreeknown) {
183628b400f6SJacob Faibussowitsch     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
18379566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
18389566063dSJacob Faibussowitsch     PetscCall(PetscFree(sf->degreetmp));
183995fce210SBarry Smith     sf->degreeknown = PETSC_TRUE;
184095fce210SBarry Smith   }
184195fce210SBarry Smith   *degree = sf->degree;
18423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
184395fce210SBarry Smith }
184495fce210SBarry Smith 
1845673100f5SVaclav Hapla /*@C
184620662ed9SBarry Smith   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
184766dfcd1aSVaclav Hapla   Each multi-root is assigned index of the corresponding original root.
1848673100f5SVaclav Hapla 
1849673100f5SVaclav Hapla   Collective
1850673100f5SVaclav Hapla 
18514165533cSJose E. Roman   Input Parameters:
1852673100f5SVaclav Hapla + sf     - star forest
1853cab54364SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1854673100f5SVaclav Hapla 
18554165533cSJose E. Roman   Output Parameters:
185620662ed9SBarry Smith + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
185720662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1858673100f5SVaclav Hapla 
1859673100f5SVaclav Hapla   Level: developer
1860673100f5SVaclav Hapla 
1861cab54364SBarry Smith   Note:
186220662ed9SBarry Smith   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1863ffe67aa5SVáclav Hapla 
1864cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1865673100f5SVaclav Hapla @*/
1866d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1867d71ae5a4SJacob Faibussowitsch {
1868673100f5SVaclav Hapla   PetscSF  msf;
1869673100f5SVaclav Hapla   PetscInt i, j, k, nroots, nmroots;
1870673100f5SVaclav Hapla 
1871673100f5SVaclav Hapla   PetscFunctionBegin;
1872673100f5SVaclav Hapla   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18739566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
18744f572ea9SToby Isaac   if (nroots) PetscAssertPointer(degree, 2);
18754f572ea9SToby Isaac   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
18764f572ea9SToby Isaac   PetscAssertPointer(multiRootsOrigNumbering, 4);
18779566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &msf));
18789566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
18799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1880673100f5SVaclav Hapla   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1881673100f5SVaclav Hapla     if (!degree[i]) continue;
1882ad540459SPierre Jolivet     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1883673100f5SVaclav Hapla   }
188408401ef6SPierre Jolivet   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
188566dfcd1aSVaclav Hapla   if (nMultiRoots) *nMultiRoots = nmroots;
18863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1887673100f5SVaclav Hapla }
1888673100f5SVaclav Hapla 
188995fce210SBarry Smith /*@C
1890cab54364SBarry Smith   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
189195fce210SBarry Smith 
189295fce210SBarry Smith   Collective
189395fce210SBarry Smith 
18944165533cSJose E. Roman   Input Parameters:
189595fce210SBarry Smith + sf       - star forest
189695fce210SBarry Smith . unit     - data type
189795fce210SBarry Smith - leafdata - leaf data to gather to roots
189895fce210SBarry Smith 
18994165533cSJose E. Roman   Output Parameter:
190095fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
190195fce210SBarry Smith 
190295fce210SBarry Smith   Level: intermediate
190395fce210SBarry Smith 
1904cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
190595fce210SBarry Smith @*/
1906d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1907d71ae5a4SJacob Faibussowitsch {
1908a5526d50SJunchao Zhang   PetscSF multi = NULL;
190995fce210SBarry Smith 
191095fce210SBarry Smith   PetscFunctionBegin;
191195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19129566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19139566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19149566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191695fce210SBarry Smith }
191795fce210SBarry Smith 
191895fce210SBarry Smith /*@C
1919cab54364SBarry Smith   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
192095fce210SBarry Smith 
192195fce210SBarry Smith   Collective
192295fce210SBarry Smith 
19234165533cSJose E. Roman   Input Parameters:
192495fce210SBarry Smith + sf       - star forest
192595fce210SBarry Smith . unit     - data type
192695fce210SBarry Smith - leafdata - leaf data to gather to roots
192795fce210SBarry Smith 
19284165533cSJose E. Roman   Output Parameter:
192995fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
193095fce210SBarry Smith 
193195fce210SBarry Smith   Level: intermediate
193295fce210SBarry Smith 
1933cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
193495fce210SBarry Smith @*/
1935d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1936d71ae5a4SJacob Faibussowitsch {
1937a5526d50SJunchao Zhang   PetscSF multi = NULL;
193895fce210SBarry Smith 
193995fce210SBarry Smith   PetscFunctionBegin;
194095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19419566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19429566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
194495fce210SBarry Smith }
194595fce210SBarry Smith 
194695fce210SBarry Smith /*@C
1947cab54364SBarry Smith   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
194895fce210SBarry Smith 
194995fce210SBarry Smith   Collective
195095fce210SBarry Smith 
19514165533cSJose E. Roman   Input Parameters:
195295fce210SBarry Smith + sf            - star forest
195395fce210SBarry Smith . unit          - data type
195495fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
195595fce210SBarry Smith 
19564165533cSJose E. Roman   Output Parameter:
195795fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
195895fce210SBarry Smith 
195995fce210SBarry Smith   Level: intermediate
196095fce210SBarry Smith 
196120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
196295fce210SBarry Smith @*/
1963d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1964d71ae5a4SJacob Faibussowitsch {
1965a5526d50SJunchao Zhang   PetscSF multi = NULL;
196695fce210SBarry Smith 
196795fce210SBarry Smith   PetscFunctionBegin;
196895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19699566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19709566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19719566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
197395fce210SBarry Smith }
197495fce210SBarry Smith 
197595fce210SBarry Smith /*@C
1976cab54364SBarry Smith   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
197795fce210SBarry Smith 
197895fce210SBarry Smith   Collective
197995fce210SBarry Smith 
19804165533cSJose E. Roman   Input Parameters:
198195fce210SBarry Smith + sf            - star forest
198295fce210SBarry Smith . unit          - data type
198395fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
198495fce210SBarry Smith 
19854165533cSJose E. Roman   Output Parameter:
198695fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
198795fce210SBarry Smith 
198895fce210SBarry Smith   Level: intermediate
198995fce210SBarry Smith 
199020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
199195fce210SBarry Smith @*/
1992d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1993d71ae5a4SJacob Faibussowitsch {
1994a5526d50SJunchao Zhang   PetscSF multi = NULL;
199595fce210SBarry Smith 
199695fce210SBarry Smith   PetscFunctionBegin;
199795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19989566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19999566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
20003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
200195fce210SBarry Smith }
2002a7b3aa13SAta Mesgarnejad 
2003d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2004d71ae5a4SJacob Faibussowitsch {
2005a072220fSLawrence Mitchell   PetscInt        i, n, nleaves;
2006a072220fSLawrence Mitchell   const PetscInt *ilocal = NULL;
2007a072220fSLawrence Mitchell   PetscHSetI      seen;
2008a072220fSLawrence Mitchell 
2009a072220fSLawrence Mitchell   PetscFunctionBegin;
2010b458e8f1SJose E. Roman   if (PetscDefined(USE_DEBUG)) {
20119566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
20129566063dSJacob Faibussowitsch     PetscCall(PetscHSetICreate(&seen));
2013a072220fSLawrence Mitchell     for (i = 0; i < nleaves; i++) {
2014a072220fSLawrence Mitchell       const PetscInt leaf = ilocal ? ilocal[i] : i;
20159566063dSJacob Faibussowitsch       PetscCall(PetscHSetIAdd(seen, leaf));
2016a072220fSLawrence Mitchell     }
20179566063dSJacob Faibussowitsch     PetscCall(PetscHSetIGetSize(seen, &n));
201808401ef6SPierre Jolivet     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
20199566063dSJacob Faibussowitsch     PetscCall(PetscHSetIDestroy(&seen));
2020b458e8f1SJose E. Roman   }
20213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2022a072220fSLawrence Mitchell }
202354729392SStefano Zampini 
2024a7b3aa13SAta Mesgarnejad /*@
2025cab54364SBarry Smith   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2026a7b3aa13SAta Mesgarnejad 
2027a7b3aa13SAta Mesgarnejad   Input Parameters:
2028cab54364SBarry Smith + sfA - The first `PetscSF`
2029cab54364SBarry Smith - sfB - The second `PetscSF`
2030a7b3aa13SAta Mesgarnejad 
20312fe279fdSBarry Smith   Output Parameter:
2032cab54364SBarry Smith . sfBA - The composite `PetscSF`
2033a7b3aa13SAta Mesgarnejad 
2034a7b3aa13SAta Mesgarnejad   Level: developer
2035a7b3aa13SAta Mesgarnejad 
2036a072220fSLawrence Mitchell   Notes:
2037cab54364SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
203854729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots.
203954729392SStefano Zampini 
204020662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
204120662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
204220662ed9SBarry Smith   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
204320662ed9SBarry Smith   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2044a072220fSLawrence Mitchell 
2045db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2046a7b3aa13SAta Mesgarnejad @*/
2047d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2048d71ae5a4SJacob Faibussowitsch {
2049a7b3aa13SAta Mesgarnejad   const PetscSFNode *remotePointsA, *remotePointsB;
2050d41018fbSJunchao Zhang   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
205154729392SStefano Zampini   const PetscInt    *localPointsA, *localPointsB;
205254729392SStefano Zampini   PetscInt          *localPointsBA;
205354729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
205454729392SStefano Zampini   PetscBool          denseB;
2055a7b3aa13SAta Mesgarnejad 
2056a7b3aa13SAta Mesgarnejad   PetscFunctionBegin;
2057a7b3aa13SAta Mesgarnejad   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
205829046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfA, 1);
205929046d53SLisandro Dalcin   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
206029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfB, 2);
206154729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
20624f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
20639566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
20649566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
206554729392SStefano Zampini 
20669566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
20679566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
206820662ed9SBarry Smith   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
206920662ed9SBarry Smith      numRootsB; otherwise, garbage will be broadcasted.
207020662ed9SBarry Smith      Example (comm size = 1):
207120662ed9SBarry Smith      sfA: 0 <- (0, 0)
207220662ed9SBarry Smith      sfB: 100 <- (0, 0)
207320662ed9SBarry Smith           101 <- (0, 1)
207420662ed9SBarry Smith      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
207520662ed9SBarry Smith      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
207620662ed9SBarry Smith      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
207720662ed9SBarry Smith      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
20789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
207954729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
208054729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
208154729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
208254729392SStefano Zampini   }
208354729392SStefano Zampini   for (i = 0; i < numLeavesA; i++) {
20840ea77edaSksagiyam     PetscInt localp = localPointsA ? localPointsA[i] : i;
20850ea77edaSksagiyam 
20860ea77edaSksagiyam     if (localp >= numRootsB) continue;
20870ea77edaSksagiyam     reorderedRemotePointsA[localp] = remotePointsA[i];
208854729392SStefano Zampini   }
2089d41018fbSJunchao Zhang   remotePointsA = reorderedRemotePointsA;
20909566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
20919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
20920ea77edaSksagiyam   for (i = 0; i < maxleaf - minleaf + 1; i++) {
20930ea77edaSksagiyam     leafdataB[i].rank  = -1;
20940ea77edaSksagiyam     leafdataB[i].index = -1;
20950ea77edaSksagiyam   }
20966497c311SBarry Smith   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20976497c311SBarry Smith   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20989566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
2099d41018fbSJunchao Zhang 
210054729392SStefano Zampini   denseB = (PetscBool)!localPointsB;
210154729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
210254729392SStefano Zampini     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
210354729392SStefano Zampini     else numLeavesBA++;
210454729392SStefano Zampini   }
210554729392SStefano Zampini   if (denseB) {
2106d41018fbSJunchao Zhang     localPointsBA  = NULL;
2107d41018fbSJunchao Zhang     remotePointsBA = leafdataB;
2108d41018fbSJunchao Zhang   } else {
21099566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
21109566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
211154729392SStefano Zampini     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
211254729392SStefano Zampini       const PetscInt l = localPointsB ? localPointsB[i] : i;
211354729392SStefano Zampini 
211454729392SStefano Zampini       if (leafdataB[l - minleaf].rank == -1) continue;
211554729392SStefano Zampini       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
211654729392SStefano Zampini       localPointsBA[numLeavesBA]  = l;
211754729392SStefano Zampini       numLeavesBA++;
211854729392SStefano Zampini     }
21199566063dSJacob Faibussowitsch     PetscCall(PetscFree(leafdataB));
2120d41018fbSJunchao Zhang   }
21219566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
21229566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
21239566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
21243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2125a7b3aa13SAta Mesgarnejad }
21261c6ba672SJunchao Zhang 
212704c0ada0SJunchao Zhang /*@
2128cab54364SBarry Smith   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
212904c0ada0SJunchao Zhang 
213004c0ada0SJunchao Zhang   Input Parameters:
2131cab54364SBarry Smith + sfA - The first `PetscSF`
2132cab54364SBarry Smith - sfB - The second `PetscSF`
213304c0ada0SJunchao Zhang 
21342fe279fdSBarry Smith   Output Parameter:
2135cab54364SBarry Smith . sfBA - The composite `PetscSF`.
213604c0ada0SJunchao Zhang 
213704c0ada0SJunchao Zhang   Level: developer
213804c0ada0SJunchao Zhang 
213954729392SStefano Zampini   Notes:
214020662ed9SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
214154729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
214220662ed9SBarry Smith   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
214354729392SStefano Zampini 
214420662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
214520662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
214620662ed9SBarry Smith   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
214720662ed9SBarry Smith   on `sfA`, then
214820662ed9SBarry Smith   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
214954729392SStefano Zampini 
2150db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
215104c0ada0SJunchao Zhang @*/
2152d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2153d71ae5a4SJacob Faibussowitsch {
215404c0ada0SJunchao Zhang   const PetscSFNode *remotePointsA, *remotePointsB;
215504c0ada0SJunchao Zhang   PetscSFNode       *remotePointsBA;
215604c0ada0SJunchao Zhang   const PetscInt    *localPointsA, *localPointsB;
215754729392SStefano Zampini   PetscSFNode       *reorderedRemotePointsA = NULL;
215854729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
21595b0d146aSStefano Zampini   MPI_Op             op;
21605b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21615b0d146aSStefano Zampini   PetscBool iswin;
21625b0d146aSStefano Zampini #endif
216304c0ada0SJunchao Zhang 
216404c0ada0SJunchao Zhang   PetscFunctionBegin;
216504c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
216604c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfA, 1);
216704c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
216804c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfB, 2);
216954729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
21704f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
21719566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
21729566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
217354729392SStefano Zampini 
21749566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
21759566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
21765b0d146aSStefano Zampini 
21775b0d146aSStefano Zampini   /* TODO: Check roots of sfB have degree of 1 */
21785b0d146aSStefano Zampini   /* Once we implement it, we can replace the MPI_MAXLOC
217983df288dSJunchao Zhang      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
21805b0d146aSStefano Zampini      We use MPI_MAXLOC only to have a deterministic output from this routine if
21815b0d146aSStefano Zampini      the root condition is not meet.
21825b0d146aSStefano Zampini    */
21835b0d146aSStefano Zampini   op = MPI_MAXLOC;
21845b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21855b0d146aSStefano Zampini   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
21869566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
218783df288dSJunchao Zhang   if (iswin) op = MPI_REPLACE;
21885b0d146aSStefano Zampini #endif
21895b0d146aSStefano Zampini 
21909566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
21919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
219254729392SStefano Zampini   for (i = 0; i < maxleaf - minleaf + 1; i++) {
219354729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
219454729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
219554729392SStefano Zampini   }
219654729392SStefano Zampini   if (localPointsA) {
219754729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
219854729392SStefano Zampini       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
219954729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
220054729392SStefano Zampini     }
220154729392SStefano Zampini   } else {
220254729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
220354729392SStefano Zampini       if (i > maxleaf || i < minleaf) continue;
220454729392SStefano Zampini       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
220554729392SStefano Zampini     }
220654729392SStefano Zampini   }
220754729392SStefano Zampini 
22089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
22099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
221054729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
221154729392SStefano Zampini     remotePointsBA[i].rank  = -1;
221254729392SStefano Zampini     remotePointsBA[i].index = -1;
221354729392SStefano Zampini   }
221454729392SStefano Zampini 
22156497c311SBarry Smith   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22166497c311SBarry Smith   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22179566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
221854729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
221954729392SStefano Zampini     if (remotePointsBA[i].rank == -1) continue;
222054729392SStefano Zampini     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
222154729392SStefano Zampini     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
222254729392SStefano Zampini     localPointsBA[numLeavesBA]        = i;
222354729392SStefano Zampini     numLeavesBA++;
222454729392SStefano Zampini   }
22259566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
22269566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
22279566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
22283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
222904c0ada0SJunchao Zhang }
223004c0ada0SJunchao Zhang 
22311c6ba672SJunchao Zhang /*
2232cab54364SBarry Smith   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
22331c6ba672SJunchao Zhang 
22342fe279fdSBarry Smith   Input Parameter:
2235cab54364SBarry Smith . sf - The global `PetscSF`
22361c6ba672SJunchao Zhang 
22372fe279fdSBarry Smith   Output Parameter:
2238cab54364SBarry Smith . out - The local `PetscSF`
2239cab54364SBarry Smith 
2240cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`
22411c6ba672SJunchao Zhang  */
2242d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2243d71ae5a4SJacob Faibussowitsch {
22441c6ba672SJunchao Zhang   MPI_Comm           comm;
22451c6ba672SJunchao Zhang   PetscMPIInt        myrank;
22461c6ba672SJunchao Zhang   const PetscInt    *ilocal;
22471c6ba672SJunchao Zhang   const PetscSFNode *iremote;
22481c6ba672SJunchao Zhang   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
22491c6ba672SJunchao Zhang   PetscSFNode       *liremote;
22501c6ba672SJunchao Zhang   PetscSF            lsf;
22511c6ba672SJunchao Zhang 
22521c6ba672SJunchao Zhang   PetscFunctionBegin;
22531c6ba672SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2254dbbe0bcdSBarry Smith   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2255dbbe0bcdSBarry Smith   else {
22561c6ba672SJunchao Zhang     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
22579566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22589566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
22591c6ba672SJunchao Zhang 
22601c6ba672SJunchao Zhang     /* Find out local edges and build a local SF */
22619566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
22629371c9d4SSatish Balay     for (i = lnleaves = 0; i < nleaves; i++) {
22639371c9d4SSatish Balay       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
22649371c9d4SSatish Balay     }
22659566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &lilocal));
22669566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &liremote));
22671c6ba672SJunchao Zhang 
22681c6ba672SJunchao Zhang     for (i = j = 0; i < nleaves; i++) {
22691c6ba672SJunchao Zhang       if (iremote[i].rank == (PetscInt)myrank) {
22701c6ba672SJunchao Zhang         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
22711c6ba672SJunchao Zhang         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
22721c6ba672SJunchao Zhang         liremote[j].index = iremote[i].index;
22731c6ba672SJunchao Zhang         j++;
22741c6ba672SJunchao Zhang       }
22751c6ba672SJunchao Zhang     }
22769566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
22779566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(lsf));
22789566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
22799566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(lsf));
22801c6ba672SJunchao Zhang     *out = lsf;
22811c6ba672SJunchao Zhang   }
22823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22831c6ba672SJunchao Zhang }
2284dd5b3ca6SJunchao Zhang 
2285dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2286d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2287d71ae5a4SJacob Faibussowitsch {
2288eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
2289dd5b3ca6SJunchao Zhang 
2290dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
2291dd5b3ca6SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
22929566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
22939566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
22949566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
22959566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2296dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
22979566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
22983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2299dd5b3ca6SJunchao Zhang }
2300dd5b3ca6SJunchao Zhang 
2301157edd7aSVaclav Hapla /*@
2302cab54364SBarry Smith   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2303157edd7aSVaclav Hapla 
2304157edd7aSVaclav Hapla   Input Parameters:
2305157edd7aSVaclav Hapla + comm        - the communicator
2306cab54364SBarry Smith . nsfs        - the number of input `PetscSF`
2307cab54364SBarry Smith . sfs         - the array of input `PetscSF`
23081f40158dSVaclav Hapla . rootMode    - the root mode specifying how roots are handled
230920662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2310157edd7aSVaclav Hapla 
23112fe279fdSBarry Smith   Output Parameter:
2312cab54364SBarry Smith . newsf - The resulting `PetscSF`
2313157edd7aSVaclav Hapla 
23141f40158dSVaclav Hapla   Level: advanced
2315157edd7aSVaclav Hapla 
2316157edd7aSVaclav Hapla   Notes:
231720662ed9SBarry Smith   The communicator of all `PetscSF`s in `sfs` must be comm.
2318157edd7aSVaclav Hapla 
231920662ed9SBarry Smith   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
232020662ed9SBarry Smith 
232120662ed9SBarry Smith   The offsets in `leafOffsets` are added to the original leaf indices.
232220662ed9SBarry Smith 
232320662ed9SBarry Smith   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
232420662ed9SBarry Smith   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
232520662ed9SBarry Smith 
232620662ed9SBarry Smith   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2327157edd7aSVaclav Hapla   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2328157edd7aSVaclav Hapla 
232920662ed9SBarry Smith   All root modes retain the essential connectivity condition.
233020662ed9SBarry Smith   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
233120662ed9SBarry Smith   Parameter `rootMode` controls how the input root spaces are combined.
233220662ed9SBarry Smith   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
233320662ed9SBarry Smith   and is also the same in the output `PetscSF`.
23341f40158dSVaclav Hapla   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
23351f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
233620662ed9SBarry Smith   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
23371f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
23381593df67SStefano Zampini   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
23391f40158dSVaclav Hapla   the original root ranks are ignored.
23401f40158dSVaclav Hapla   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
234120662ed9SBarry Smith   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
234220662ed9SBarry Smith   to keep the load balancing.
234320662ed9SBarry Smith   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
23441f40158dSVaclav Hapla 
23451f40158dSVaclav Hapla   Example:
23461f40158dSVaclav Hapla   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
234720662ed9SBarry Smith .vb
234820662ed9SBarry Smith   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
234920662ed9SBarry Smith   for m in {local,global,shared}; do
235020662ed9SBarry Smith     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
235120662ed9SBarry Smith   done
235220662ed9SBarry Smith .ve
235320662ed9SBarry Smith   we generate two identical `PetscSF`s sf_0 and sf_1,
235420662ed9SBarry Smith .vb
235520662ed9SBarry Smith   PetscSF Object: sf_0 2 MPI processes
235620662ed9SBarry Smith     type: basic
235720662ed9SBarry Smith     rank #leaves #roots
235820662ed9SBarry Smith     [ 0]       4      2
235920662ed9SBarry Smith     [ 1]       4      2
236020662ed9SBarry Smith     leaves      roots       roots in global numbering
236120662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
236220662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
236320662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
236420662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
236520662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
236620662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
236720662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
236820662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
236920662ed9SBarry Smith .ve
2370e33f79d8SJacob Faibussowitsch   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
237120662ed9SBarry Smith .vb
237220662ed9SBarry Smith   rootMode = local:
237320662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
237420662ed9SBarry Smith     type: basic
237520662ed9SBarry Smith     rank #leaves #roots
237620662ed9SBarry Smith     [ 0]       8      4
237720662ed9SBarry Smith     [ 1]       8      4
237820662ed9SBarry Smith     leaves      roots       roots in global numbering
237920662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
238020662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
238120662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   4
238220662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   5
238320662ed9SBarry Smith     ( 0,  4) <- ( 0,  2)  =   2
238420662ed9SBarry Smith     ( 0,  5) <- ( 0,  3)  =   3
238520662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
238620662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
238720662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
238820662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
238920662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   4
239020662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   5
239120662ed9SBarry Smith     ( 1,  4) <- ( 0,  2)  =   2
239220662ed9SBarry Smith     ( 1,  5) <- ( 0,  3)  =   3
239320662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
239420662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
239520662ed9SBarry Smith 
239620662ed9SBarry Smith   rootMode = global:
239720662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
239820662ed9SBarry Smith     type: basic
239920662ed9SBarry Smith     rank #leaves #roots
240020662ed9SBarry Smith     [ 0]       8      4
240120662ed9SBarry Smith     [ 1]       8      4
240220662ed9SBarry Smith     leaves      roots       roots in global numbering
240320662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
240420662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
240520662ed9SBarry Smith     ( 0,  2) <- ( 0,  2)  =   2
240620662ed9SBarry Smith     ( 0,  3) <- ( 0,  3)  =   3
240720662ed9SBarry Smith     ( 0,  4) <- ( 1,  0)  =   4
240820662ed9SBarry Smith     ( 0,  5) <- ( 1,  1)  =   5
240920662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
241020662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
241120662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
241220662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
241320662ed9SBarry Smith     ( 1,  2) <- ( 0,  2)  =   2
241420662ed9SBarry Smith     ( 1,  3) <- ( 0,  3)  =   3
241520662ed9SBarry Smith     ( 1,  4) <- ( 1,  0)  =   4
241620662ed9SBarry Smith     ( 1,  5) <- ( 1,  1)  =   5
241720662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
241820662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
241920662ed9SBarry Smith 
242020662ed9SBarry Smith   rootMode = shared:
242120662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
242220662ed9SBarry Smith     type: basic
242320662ed9SBarry Smith     rank #leaves #roots
242420662ed9SBarry Smith     [ 0]       8      2
242520662ed9SBarry Smith     [ 1]       8      2
242620662ed9SBarry Smith     leaves      roots       roots in global numbering
242720662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
242820662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
242920662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
243020662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
243120662ed9SBarry Smith     ( 0,  4) <- ( 0,  0)  =   0
243220662ed9SBarry Smith     ( 0,  5) <- ( 0,  1)  =   1
243320662ed9SBarry Smith     ( 0,  6) <- ( 1,  0)  =   2
243420662ed9SBarry Smith     ( 0,  7) <- ( 1,  1)  =   3
243520662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
243620662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
243720662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
243820662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
243920662ed9SBarry Smith     ( 1,  4) <- ( 0,  0)  =   0
244020662ed9SBarry Smith     ( 1,  5) <- ( 0,  1)  =   1
244120662ed9SBarry Smith     ( 1,  6) <- ( 1,  0)  =   2
244220662ed9SBarry Smith     ( 1,  7) <- ( 1,  1)  =   3
244320662ed9SBarry Smith .ve
24441f40158dSVaclav Hapla 
24451f40158dSVaclav Hapla .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2446157edd7aSVaclav Hapla @*/
24471f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2448d71ae5a4SJacob Faibussowitsch {
2449157edd7aSVaclav Hapla   PetscInt     i, s, nLeaves, nRoots;
2450157edd7aSVaclav Hapla   PetscInt    *leafArrayOffsets;
2451157edd7aSVaclav Hapla   PetscInt    *ilocal_new;
2452157edd7aSVaclav Hapla   PetscSFNode *iremote_new;
2453157edd7aSVaclav Hapla   PetscBool    all_ilocal_null = PETSC_FALSE;
24541f40158dSVaclav Hapla   PetscLayout  glayout         = NULL;
24551f40158dSVaclav Hapla   PetscInt    *gremote         = NULL;
24561f40158dSVaclav Hapla   PetscMPIInt  rank, size;
2457157edd7aSVaclav Hapla 
2458157edd7aSVaclav Hapla   PetscFunctionBegin;
245912f479c1SVaclav Hapla   if (PetscDefined(USE_DEBUG)) {
2460157edd7aSVaclav Hapla     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2461157edd7aSVaclav Hapla 
24629566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, &dummy));
2463157edd7aSVaclav Hapla     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
24644f572ea9SToby Isaac     PetscAssertPointer(sfs, 3);
2465157edd7aSVaclav Hapla     for (i = 0; i < nsfs; i++) {
2466157edd7aSVaclav Hapla       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2467157edd7aSVaclav Hapla       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2468157edd7aSVaclav Hapla     }
24691f40158dSVaclav Hapla     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
24704f572ea9SToby Isaac     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
24714f572ea9SToby Isaac     PetscAssertPointer(newsf, 6);
24729566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&dummy));
2473157edd7aSVaclav Hapla   }
2474157edd7aSVaclav Hapla   if (!nsfs) {
24759566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
24769566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
24773ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2478157edd7aSVaclav Hapla   }
24799566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
24801f40158dSVaclav Hapla   PetscCallMPI(MPI_Comm_size(comm, &size));
2481157edd7aSVaclav Hapla 
24821f40158dSVaclav Hapla   /* Calculate leaf array offsets */
24839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2484157edd7aSVaclav Hapla   leafArrayOffsets[0] = 0;
2485157edd7aSVaclav Hapla   for (s = 0; s < nsfs; s++) {
2486157edd7aSVaclav Hapla     PetscInt nl;
2487157edd7aSVaclav Hapla 
24889566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2489157edd7aSVaclav Hapla     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2490157edd7aSVaclav Hapla   }
2491157edd7aSVaclav Hapla   nLeaves = leafArrayOffsets[nsfs];
2492157edd7aSVaclav Hapla 
24931f40158dSVaclav Hapla   /* Calculate number of roots */
24941f40158dSVaclav Hapla   switch (rootMode) {
24951f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
24961f40158dSVaclav Hapla     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
24971f40158dSVaclav Hapla     if (PetscDefined(USE_DEBUG)) {
24981f40158dSVaclav Hapla       for (s = 1; s < nsfs; s++) {
24991f40158dSVaclav Hapla         PetscInt nr;
25001f40158dSVaclav Hapla 
25011f40158dSVaclav Hapla         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
25021f40158dSVaclav Hapla         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
25031f40158dSVaclav Hapla       }
25041f40158dSVaclav Hapla     }
25051f40158dSVaclav Hapla   } break;
25061f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
25071f40158dSVaclav Hapla     /* Calculate also global layout in this case */
25081f40158dSVaclav Hapla     PetscInt    *nls;
25091f40158dSVaclav Hapla     PetscLayout *lts;
25101f40158dSVaclav Hapla     PetscInt   **inds;
25111f40158dSVaclav Hapla     PetscInt     j;
25121f40158dSVaclav Hapla     PetscInt     rootOffset = 0;
25131f40158dSVaclav Hapla 
25141f40158dSVaclav Hapla     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
25151f40158dSVaclav Hapla     PetscCall(PetscLayoutCreate(comm, &glayout));
25161f40158dSVaclav Hapla     glayout->bs = 1;
25171f40158dSVaclav Hapla     glayout->n  = 0;
25181f40158dSVaclav Hapla     glayout->N  = 0;
25191f40158dSVaclav Hapla     for (s = 0; s < nsfs; s++) {
25201f40158dSVaclav Hapla       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
25211f40158dSVaclav Hapla       glayout->n += lts[s]->n;
25221f40158dSVaclav Hapla       glayout->N += lts[s]->N;
25231f40158dSVaclav Hapla     }
25241f40158dSVaclav Hapla     PetscCall(PetscLayoutSetUp(glayout));
25251f40158dSVaclav Hapla     PetscCall(PetscMalloc1(nLeaves, &gremote));
25261f40158dSVaclav Hapla     for (s = 0, j = 0; s < nsfs; s++) {
25271f40158dSVaclav Hapla       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
25281f40158dSVaclav Hapla       rootOffset += lts[s]->N;
25291f40158dSVaclav Hapla       PetscCall(PetscLayoutDestroy(&lts[s]));
25301f40158dSVaclav Hapla       PetscCall(PetscFree(inds[s]));
25311f40158dSVaclav Hapla     }
25321f40158dSVaclav Hapla     PetscCall(PetscFree3(lts, nls, inds));
25331f40158dSVaclav Hapla     nRoots = glayout->N;
25341f40158dSVaclav Hapla   } break;
25351f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
25361f40158dSVaclav Hapla     /* nRoots calculated later in this case */
25371f40158dSVaclav Hapla     break;
25381f40158dSVaclav Hapla   default:
25391f40158dSVaclav Hapla     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
25401f40158dSVaclav Hapla   }
25411f40158dSVaclav Hapla 
2542157edd7aSVaclav Hapla   if (!leafOffsets) {
2543157edd7aSVaclav Hapla     all_ilocal_null = PETSC_TRUE;
2544157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2545157edd7aSVaclav Hapla       const PetscInt *ilocal;
2546157edd7aSVaclav Hapla 
25479566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2548157edd7aSVaclav Hapla       if (ilocal) {
2549157edd7aSVaclav Hapla         all_ilocal_null = PETSC_FALSE;
2550157edd7aSVaclav Hapla         break;
2551157edd7aSVaclav Hapla       }
2552157edd7aSVaclav Hapla     }
2553157edd7aSVaclav Hapla     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2554157edd7aSVaclav Hapla   }
2555157edd7aSVaclav Hapla 
2556157edd7aSVaclav Hapla   /* Renumber and concatenate local leaves */
2557157edd7aSVaclav Hapla   ilocal_new = NULL;
2558157edd7aSVaclav Hapla   if (!all_ilocal_null) {
25599566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2560157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2561157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2562157edd7aSVaclav Hapla       const PetscInt *ilocal;
25638e3a54c0SPierre Jolivet       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2564157edd7aSVaclav Hapla       PetscInt        i, nleaves_l;
2565157edd7aSVaclav Hapla 
25669566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2567157edd7aSVaclav Hapla       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2568157edd7aSVaclav Hapla     }
2569157edd7aSVaclav Hapla   }
2570157edd7aSVaclav Hapla 
2571157edd7aSVaclav Hapla   /* Renumber and concatenate remote roots */
25721f40158dSVaclav Hapla   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
25731f40158dSVaclav Hapla     PetscInt rootOffset = 0;
25741f40158dSVaclav Hapla 
25759566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2576157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) {
2577157edd7aSVaclav Hapla       iremote_new[i].rank  = -1;
2578157edd7aSVaclav Hapla       iremote_new[i].index = -1;
2579157edd7aSVaclav Hapla     }
2580157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2581157edd7aSVaclav Hapla       PetscInt           i, nl, nr;
2582157edd7aSVaclav Hapla       PetscSF            tmp_sf;
2583157edd7aSVaclav Hapla       const PetscSFNode *iremote;
2584157edd7aSVaclav Hapla       PetscSFNode       *tmp_rootdata;
25858e3a54c0SPierre Jolivet       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2586157edd7aSVaclav Hapla 
25879566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
25889566063dSJacob Faibussowitsch       PetscCall(PetscSFCreate(comm, &tmp_sf));
2589157edd7aSVaclav Hapla       /* create helper SF with contiguous leaves */
25909566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
25919566063dSJacob Faibussowitsch       PetscCall(PetscSFSetUp(tmp_sf));
25929566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
25931f40158dSVaclav Hapla       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2594157edd7aSVaclav Hapla         for (i = 0; i < nr; i++) {
25951f40158dSVaclav Hapla           tmp_rootdata[i].index = i + rootOffset;
25966497c311SBarry Smith           tmp_rootdata[i].rank  = rank;
2597157edd7aSVaclav Hapla         }
25981f40158dSVaclav Hapla         rootOffset += nr;
25991f40158dSVaclav Hapla       } else {
26001f40158dSVaclav Hapla         for (i = 0; i < nr; i++) {
26011f40158dSVaclav Hapla           tmp_rootdata[i].index = i;
26026497c311SBarry Smith           tmp_rootdata[i].rank  = rank;
26031f40158dSVaclav Hapla         }
26041f40158dSVaclav Hapla       }
26056497c311SBarry Smith       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26066497c311SBarry Smith       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26079566063dSJacob Faibussowitsch       PetscCall(PetscSFDestroy(&tmp_sf));
26089566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp_rootdata));
2609157edd7aSVaclav Hapla     }
2610aa624791SPierre Jolivet     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2611157edd7aSVaclav Hapla 
2612157edd7aSVaclav Hapla     /* Build the new SF */
26139566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
26149566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
26151f40158dSVaclav Hapla   } else {
26161f40158dSVaclav Hapla     /* Build the new SF */
26171f40158dSVaclav Hapla     PetscCall(PetscSFCreate(comm, newsf));
26181f40158dSVaclav Hapla     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
26191f40158dSVaclav Hapla   }
26209566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(*newsf));
26211f40158dSVaclav Hapla   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
26221f40158dSVaclav Hapla   PetscCall(PetscLayoutDestroy(&glayout));
26231f40158dSVaclav Hapla   PetscCall(PetscFree(gremote));
26249566063dSJacob Faibussowitsch   PetscCall(PetscFree(leafArrayOffsets));
26253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2626157edd7aSVaclav Hapla }
26278e54d7e8SToby Isaac 
26288e54d7e8SToby Isaac /*@
26298e54d7e8SToby Isaac   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
26308e54d7e8SToby Isaac 
26318e54d7e8SToby Isaac   Collective
26328e54d7e8SToby Isaac 
26338e54d7e8SToby Isaac   Input Parameters:
26348e54d7e8SToby Isaac + sf       - star forest
26358e54d7e8SToby Isaac . unit     - the data type contained within the root and leaf data
26368e54d7e8SToby Isaac . rootdata - root data that will be used for muliple PetscSF communications
26378e54d7e8SToby Isaac - leafdata - leaf data that will be used for muliple PetscSF communications
26388e54d7e8SToby Isaac 
26398e54d7e8SToby Isaac   Level: advanced
26408e54d7e8SToby Isaac 
26418e54d7e8SToby Isaac   Notes:
26428e54d7e8SToby Isaac   Implementations of `PetscSF` can make optimizations
26438e54d7e8SToby Isaac   for repeated communication using the same memory regions, but these optimizations
26448e54d7e8SToby Isaac   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
26458e54d7e8SToby Isaac   The intended pattern is
26468e54d7e8SToby Isaac 
26478e54d7e8SToby Isaac .vb
26488e54d7e8SToby Isaac   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
26498e54d7e8SToby Isaac 
26508e54d7e8SToby Isaac   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
26518e54d7e8SToby Isaac   // repeated use of rootdata and leafdata will now be optimized
26528e54d7e8SToby Isaac 
26538e54d7e8SToby Isaac   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
26548e54d7e8SToby Isaac   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
26558e54d7e8SToby Isaac   // ...
26568e54d7e8SToby Isaac   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
26578e54d7e8SToby Isaac   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
26588e54d7e8SToby Isaac   // ... (other communications)
26598e54d7e8SToby Isaac 
26608e54d7e8SToby Isaac   // rootdata and leafdata must be deregistered before freeing
26618e54d7e8SToby Isaac   // skipping this can lead to undefined behavior including
26628e54d7e8SToby Isaac   // deadlocks
26638e54d7e8SToby Isaac   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
26648e54d7e8SToby Isaac 
26658e54d7e8SToby Isaac   // it is now safe to free rootdata and leafdata
26668e54d7e8SToby Isaac   PetscFree2(rootdata, leafdata);
26678e54d7e8SToby Isaac .ve
26688e54d7e8SToby Isaac 
26698e54d7e8SToby Isaac   If you do not register `rootdata` and `leafdata` it will not cause an error,
26708e54d7e8SToby Isaac   but optimizations that reduce the setup time for each communication cannot be
26718e54d7e8SToby Isaac   made.  Currently, the only implementation of `PetscSF` that benefits from
26728e54d7e8SToby Isaac   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
26738e54d7e8SToby Isaac   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
26748e54d7e8SToby Isaac 
26758e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
26768e54d7e8SToby Isaac @*/
26778e54d7e8SToby Isaac PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
26788e54d7e8SToby Isaac {
26798e54d7e8SToby Isaac   PetscFunctionBegin;
26808e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
26818e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
26828e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
26838e54d7e8SToby Isaac }
26848e54d7e8SToby Isaac 
26858e54d7e8SToby Isaac /*@
26868e54d7e8SToby Isaac   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
26878e54d7e8SToby Isaac 
26888e54d7e8SToby Isaac   Collective
26898e54d7e8SToby Isaac 
26908e54d7e8SToby Isaac   Input Parameters:
26918e54d7e8SToby Isaac + sf       - star forest
26928e54d7e8SToby Isaac . unit     - the data type contained within the root and leaf data
26938e54d7e8SToby Isaac . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
26948e54d7e8SToby Isaac - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
26958e54d7e8SToby Isaac 
26968e54d7e8SToby Isaac   Level: advanced
26978e54d7e8SToby Isaac 
26988e54d7e8SToby Isaac   Note:
26998e54d7e8SToby Isaac   See `PetscSFRegisterPersistent()` for when/how to use this function.
27008e54d7e8SToby Isaac 
27018e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
27028e54d7e8SToby Isaac @*/
27038e54d7e8SToby Isaac PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
27048e54d7e8SToby Isaac {
27058e54d7e8SToby Isaac   PetscFunctionBegin;
27068e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
27078e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
27088e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
27098e54d7e8SToby Isaac }
2710e1187f0dSToby Isaac 
2711e1187f0dSToby Isaac PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2712e1187f0dSToby Isaac {
2713e1187f0dSToby Isaac   MPI_Aint lb, lb_true, bytes, bytes_true;
2714e1187f0dSToby Isaac 
2715e1187f0dSToby Isaac   PetscFunctionBegin;
2716e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2717e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2718e1187f0dSToby Isaac   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2719e1187f0dSToby Isaac   *size = bytes;
2720e1187f0dSToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
2721e1187f0dSToby Isaac }
2722